In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
pip install pandas scikit-learn



In [3]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline

# Load the datasets
campaign_data = pd.read_csv('/content/drive/MyDrive/Affinity task 2/campaign_performance.csv')
submission_form = pd.read_csv('/content/drive/MyDrive/Affinity task 2/c2_submission_form.csv')

# Specify columns for modeling
categorical_cols = ['ad_file_type', 'ad_size', 'goal_type', 'media_type', 'campaign_type']
numerical_cols = ['goal_value', 'max_bid', 'plan_impressions', 'daily_impression_cap', 'daily_estimated_budget']

# Prepare training data
X_train = campaign_data[categorical_cols + numerical_cols]
y_train = campaign_data['ctr']

# Prepare the test data
X_test = submission_form[categorical_cols + numerical_cols]

# Define preprocessing for categorical data and model pipeline
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols),
    ],
    remainder='passthrough'
)

# Define the model pipeline
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(n_estimators=100, random_state=42))
])

# Train the model
model.fit(X_train, y_train)

# Predict CTR for the test dataset
submission_form['predicted_ctr'] = model.predict(X_test)

# Save the results to a new CSV file
submission_form.to_csv('c2_submission_form_with_predictions.csv', index=False)

print("CTR predictions saved to 'c2_submission_form_with_predictions.csv'")


CTR predictions saved to 'c2_submission_form_with_predictions.csv'


In [8]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import r2_score, mean_squared_error

# Load the datasets
campaign_data = pd.read_csv('/content/drive/MyDrive/Affinity task 2/campaign_performance.csv')
submission_form = pd.read_csv('/content/drive/MyDrive/Affinity task 2/c2_submission_form.csv')

# Specify columns for modeling
categorical_cols = ['ad_file_type', 'ad_size', 'goal_type', 'media_type', 'campaign_type']
numerical_cols = ['goal_value', 'max_bid', 'plan_impressions', 'daily_impression_cap', 'daily_estimated_budget']

# Prepare training data
X_train = campaign_data[categorical_cols + numerical_cols]
y_train = campaign_data['ctr']

# Define preprocessing for categorical data and model pipeline
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols),
    ],
    remainder='passthrough'
)

# Define the model pipeline
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(n_estimators=100, random_state=42))
])

# Train the model on the training data
model.fit(X_train, y_train)

# Predict CTR for training data (cross-validation predictions)
y_train_pred = cross_val_predict(model, X_train, y_train, cv=5)

# Compute R² score on training data
r2 = r2_score(y_train, y_train_pred)
print(f"R² Score on Training Data: {r2:.4f}")

# Predict CTR for the test dataset
X_test = submission_form[categorical_cols + numerical_cols]
submission_form['predicted_ctr'] = model.predict(X_test)

# Save predictions to a new CSV file
submission_form.to_csv('c2_submission_form_with_predictionsr2.csv', index=False)
print("Predicted CTRs saved to 'c2_submission_form_with_predictionsr2.csv'.")


R² Score on Training Data: 0.8183
Predicted CTRs saved to 'c2_submission_form_with_predictionsr2.csv'.
