In [3]:
import pandas as pd
import numpy as np
from datetime import timedelta
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
import pickle

# Read the data
df = pd.read_csv("Train (1).csv")

# Convert 'date' column to datetime format
df['date'] = pd.to_datetime(df['date'])

# Drop unnecessary columns
cols_to_drop = ['cost', 'conversions', 'currency', 'call_type', 'call_status', 
                'start_time', 'duration', 'end_time', 'impression_share', 
                'display_location', 'conversions_calls']
df = df.drop(cols_to_drop, axis=1)
df.dropna(inplace=True)

# Ordinal encode the 'ad_type' column
ad_type_encoder = LabelEncoder()
df['ad_type_encoded'] = ad_type_encoder.fit_transform(df['ad_type'])

# Ordinal encode the 'ID' column
label_encoder = LabelEncoder()
df['ID_encoded'] = label_encoder.fit_transform(df['ID'])

# Split the data into features and target
X = df.drop(['clicks', 'date', 'ID', 'ad_type'], axis=1)
y = df['clicks']

# Convert to NumPy arrays
X = X.values
y = y.values

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Train the RandomForestRegressor model
forest = RandomForestRegressor()
forest.fit(X_train, y_train)

# Calculate the score of the model on the test set
print("Model Score on Test Data:", forest.score(X_test, y_test))

# Save the model and the feature names
with open('saved_model.pkl', 'wb') as f:
    pickle.dump(forest, f)
    pickle.dump(df.drop(['clicks', 'date', 'ID', 'ad_type'], axis=1).columns.tolist(), f)

# Load the model and the feature names
with open('saved_model.pkl', 'rb') as f:
    loaded_forest = pickle.load(f)
    feature_names = pickle.load(f)

# Generate future dates for one and two weeks ahead
future_date_1_week = pd.Timestamp.now() + timedelta(days=7)
future_date_2_weeks = pd.Timestamp.now() + timedelta(days=14)

# Filter data based on unique 'ID' values
unique_ids = df['ID'].unique()

# Create a DataFrame for one and two weeks future dates
future_df_1_week = pd.DataFrame({'ID': unique_ids})
future_df_2_weeks = pd.DataFrame({'ID': unique_ids})

# Ordinal encode the 'ID' column in future data
future_df_1_week['ID_encoded'] = label_encoder.transform(future_df_1_week['ID'])
future_df_2_weeks['ID_encoded'] = label_encoder.transform(future_df_2_weeks['ID'])

# Add the missing columns with default values
for col in feature_names:
    if col not in future_df_1_week.columns:
        future_df_1_week[col] = 0
    if col not in future_df_2_weeks.columns:
        future_df_2_weeks[col] = 0

# Ensure the future DataFrames have the same columns as the training set in the correct order
future_df_1_week = future_df_1_week[feature_names + ['ID']]
future_df_2_weeks = future_df_2_weeks[feature_names + ['ID']]

# Separate the 'ID' column to keep it for displaying later
future_ids_1_week = future_df_1_week['ID']
future_ids_2_weeks = future_df_2_weeks['ID']

# Remove 'ID' column for prediction
future_df_1_week = future_df_1_week.drop('ID', axis=1).values
future_df_2_weeks = future_df_2_weeks.drop('ID', axis=1).values

# Make predictions for one and two weeks future dates
future_clicks_pred_1_week = loaded_forest.predict(future_df_1_week)
future_clicks_pred_2_weeks = loaded_forest.predict(future_df_2_weeks)

# Combine predictions with IDs
predictions_1_week = pd.DataFrame({'ID': future_ids_1_week, 'Predicted_Clicks_1_Week': future_clicks_pred_1_week})
predictions_2_weeks = pd.DataFrame({'ID': future_ids_2_weeks, 'Predicted_Clicks_2_Weeks': future_clicks_pred_2_weeks})

# Round the predictions to integers
predictions_1_week['Predicted_Clicks_1_Week'] = predictions_1_week['Predicted_Clicks_1_Week'].round().astype(int)
predictions_2_weeks['Predicted_Clicks_2_Weeks'] = predictions_2_weeks['Predicted_Clicks_2_Weeks'].round().astype(int)

# Save predictions to Excel file
predictions_1_week.to_excel('predictions_1_week.xlsx', index=False)
predictions_2_weeks.to_excel('predictions_2_weeks.xlsx', index=False)


print("Predictions saved to Excel files.")


Model Score on Test Data: 0.9640915672094031
Combined predictions saved to Excel file.
Predictions saved to Excel files.
