In [1]:
import pandas as pd
import tensorflow as tf
import joblib
import pickle
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from kerastuner.tuners import RandomSearch
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

  from kerastuner.tuners import RandomSearch


## Data Preparation: Scaling & Splitting

In [2]:
#  Load cleaned dataset
df = pd.read_csv("cleaned_data.csv")

df=df.drop(columns=['Campaign_ID'], errors='ignore')
#  Features (X) and Target Variable (y)
X = df.drop(columns=['Conversion_Rate'], errors='ignore')  # Remove target & non-numeric columns
y = df['Conversion_Rate']

# Split into Train & Test Sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#print("Before Scaling:\n", X_train["Budget"].describe())

# Identify columns to scale (exclude 'Budget')
columns_to_scale = [col for col in X_train.columns if col != "Budget"]

# Apply StandardScaler only to selected columns
scaler = StandardScaler()

X_train_scaled = X_train.copy()  # Keep original DataFrame structure
X_test_scaled = X_test.copy()

X_train_scaled[columns_to_scale] = scaler.fit_transform(X_train[columns_to_scale])
X_test_scaled[columns_to_scale] = scaler.transform(X_test[columns_to_scale])

# Keep 'Budget' as it is (already scaled in preprocessing)
X_train_scaled["Budget"] = X_train["Budget"]
X_test_scaled["Budget"] = X_test["Budget"]

## Train & Predict: Random Forest Model

In [3]:
#Train Random Forest Model
rf_model = RandomForestRegressor(random_state=42)
rf_model.fit(X_train_scaled, y_train)
             
#Make Predictions
rf_preds = rf_model.predict(X_test_scaled)

## Train & Predict: Linear Regression Model

In [4]:
# Train Linear Regression Model
lr_model = LinearRegression()
lr_model.fit(X_train_scaled, y_train)

#Make Predictions
lr_preds = lr_model.predict(X_test_scaled)

## Save Trained Models for Future Use

In [5]:
# Save the models
joblib.dump(lr_model, "linear_regression_model.pkl")
joblib.dump(rf_model, "random_forest_model.pkl")

['random_forest_model.pkl']

## Evaluate Model Performance using MSE & R² Score

In [6]:
# Calculate Performance Metrics
# For Linear Regression:
lr_mse = mean_squared_error(y_test, lr_preds)
lr_r2 = r2_score(y_test, lr_preds)

# For Random Forest
rf_mse = mean_squared_error(y_test, rf_preds)
rf_r2 = r2_score(y_test, rf_preds)

# Printing the results
print(f"Linear Regression - MSE: {lr_mse:.6f}, R²: {lr_r2:.6f}")
print(f"Random Forest - MSE: {rf_mse:.6f}, R²: {rf_r2:.6f}")

Linear Regression - MSE: 0.000265, R²: 0.227070
Random Forest - MSE: 0.000010, R²: 0.971208


## Create a DataFrame to Compare Actual vs Predicted Values

In [7]:
results_df = pd.DataFrame({'Actual': y_test, 'Rf_Predicted': rf_preds, 'LR_Predicted': lr_preds})
results_df.head()

Unnamed: 0,Actual,Rf_Predicted,LR_Predicted
521,0.005081,0.005144,0.008676
737,0.006237,0.006036,0.001483
740,0.00333,0.003339,-0.012431
660,0.003238,0.003381,0.0071
411,0.00409,0.004311,0.000763


In [8]:
# Preview the First Few Rows of Feature Data
X.head()

Unnamed: 0,Budget,Duration,Platform,Content_Type,Target_Age,Target_Gender,Region,Clicks,Conversions,CTR,CPC,Success
0,0.316727,0.631579,2,4,2,1,4,0.96937,0.42212,0.009574,0.000757,1
1,0.017074,0.087719,3,4,3,1,3,0.301657,0.467081,0.049584,0.00014,1
2,0.765414,0.894737,4,1,2,0,4,0.161619,0.547371,0.000663,0.010922,1
3,0.897313,0.438596,0,3,1,1,4,0.436487,0.855881,0.001526,0.004771,1
4,0.226219,0.578947,1,2,0,0,3,0.329934,0.296066,0.004564,0.001596,1


## Convert X_test into a DataFrame for Better Readability

In [9]:
df2=pd.DataFrame(X_test, columns=X.columns)
df2.head()

Unnamed: 0,Budget,Duration,Platform,Content_Type,Target_Age,Target_Gender,Region,Clicks,Conversions,CTR,CPC,Success
521,0.196906,0.421053,4,0,2,0,4,0.846406,0.669008,0.013398,0.000539,1
737,0.818844,0.754386,1,2,1,2,1,0.513153,0.497591,0.001966,0.003705,1
740,0.831063,0.157895,4,4,0,2,4,0.662544,0.343035,0.002502,0.002913,1
660,0.030758,1.0,3,4,4,1,4,0.312699,0.156564,0.030041,0.000237,1
411,0.241368,0.982456,2,3,2,0,0,0.619042,0.393617,0.008014,0.000906,1


## Combine Features (X_test) with Actual and Predicted Values

In [10]:
df3=pd.concat([df2,results_df],axis=1)
df3.head()

Unnamed: 0,Budget,Duration,Platform,Content_Type,Target_Age,Target_Gender,Region,Clicks,Conversions,CTR,CPC,Success,Actual,Rf_Predicted,LR_Predicted
521,0.196906,0.421053,4,0,2,0,4,0.846406,0.669008,0.013398,0.000539,1,0.005081,0.005144,0.008676
737,0.818844,0.754386,1,2,1,2,1,0.513153,0.497591,0.001966,0.003705,1,0.006237,0.006036,0.001483
740,0.831063,0.157895,4,4,0,2,4,0.662544,0.343035,0.002502,0.002913,1,0.00333,0.003339,-0.012431
660,0.030758,1.0,3,4,4,1,4,0.312699,0.156564,0.030041,0.000237,1,0.003238,0.003381,0.0071
411,0.241368,0.982456,2,3,2,0,0,0.619042,0.393617,0.008014,0.000906,1,0.00409,0.004311,0.000763


## Save Predictions to CSVs for Analysis

In [11]:
df3.to_csv("test_predictions.csv", index=False)  # Save df3
print("Test predictions saved as 'test_predictions.csv'")

Test predictions saved as 'test_predictions.csv'


In [12]:
# Save Predictions to CSV
lr_results = pd.DataFrame({"Actual": y_test, "Predicted": lr_preds})
lr_results.to_csv("lr_predictions.csv", index=False)

print("Linear Regression Predictions Saved as 'lr_predictions.csv'")

Linear Regression Predictions Saved as 'lr_predictions.csv'


## Format & Highlight Predictions for Better Visibility

In [13]:
# Load test predictions from df3
df3 = pd.read_csv("test_predictions.csv")

# Check available columns
print("Columns in df3:", df3.columns.tolist())

# Format numerical columns to 4 decimal places
numeric_cols = ["Clicks", "Conversions", "CTR", "CPC", "Actual", "Rf_Predicted", "LR_Predicted"]
df3[numeric_cols] = df3[numeric_cols].apply(lambda x: round(x, 4))

# Apply color highlighting for Actual vs Predicted
def highlight_predictions(val):
    """Color highlight: Green if close, Red if far apart"""
    color = "lightgreen" if abs(val) < 0.01 else "salmon"
    return f"background-color: {color}"

# Ensure correct column names for styling
styled_df = df3.head().style.set_properties(**{'text-align': 'center'}).set_table_styles(
    [{'selector': 'th', 'props': [('font-size', '12pt'), ('text-align', 'center')]}]
).applymap(highlight_predictions, subset=["Rf_Predicted", "LR_Predicted"])

# Display styled table in Jupyter Notebook
display(styled_df)

# Save predictions to CSV
df3.to_csv("rf_predictions.csv", index=False)
print(" Predictions saved as 'rf_predictions.csv'")


Columns in df3: ['Budget', 'Duration', 'Platform', 'Content_Type', 'Target_Age', 'Target_Gender', 'Region', 'Clicks', 'Conversions', 'CTR', 'CPC', 'Success', 'Actual', 'Rf_Predicted', 'LR_Predicted']


  styled_df = df3.head().style.set_properties(**{'text-align': 'center'}).set_table_styles(


Unnamed: 0,Budget,Duration,Platform,Content_Type,Target_Age,Target_Gender,Region,Clicks,Conversions,CTR,CPC,Success,Actual,Rf_Predicted,LR_Predicted
0,0.196906,0.421053,4,0,2,0,4,0.8464,0.669,0.0134,0.0005,1,0.0051,0.0051,0.0087
1,0.818844,0.754386,1,2,1,2,1,0.5132,0.4976,0.002,0.0037,1,0.0062,0.006,0.0015
2,0.831063,0.157895,4,4,0,2,4,0.6625,0.343,0.0025,0.0029,1,0.0033,0.0033,-0.0124
3,0.030758,1.0,3,4,4,1,4,0.3127,0.1566,0.03,0.0002,1,0.0032,0.0034,0.0071
4,0.241368,0.982456,2,3,2,0,0,0.619,0.3936,0.008,0.0009,1,0.0041,0.0043,0.0008


 Predictions saved as 'rf_predictions.csv'
