In [11]:
import pandas as pd
import numpy as np
from datetime import timedelta
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
import pickle

# Read the data
df = pd.read_csv("Train (1).csv")

# Convert 'date' column to datetime format
df['date'] = pd.to_datetime(df['date'])

# Drop unnecessary columns
cols_to_drop = ['cost', 'conversions', 'currency', 'call_type', 'call_status', 
                'start_time', 'duration', 'end_time', 'impression_share', 
                'display_location', 'conversions_calls']
df = df.drop(cols_to_drop, axis=1)
df.dropna(inplace=True)

# Ordinal encode the 'ad_type' column
ad_type_encoder = LabelEncoder()
df['ad_type_encoded'] = ad_type_encoder.fit_transform(df['ad_type'])

# Ordinal encode the 'ID' column
label_encoder = LabelEncoder()
df['ID_encoded'] = label_encoder.fit_transform(df['ID'])

# Split the data into features and target
X = df.drop(['clicks', 'date', 'ID', 'ad_type'], axis=1)
y = df['clicks']

# Convert to NumPy arrays
X = X.values
y = y.values

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Train the RandomForestRegressor model
forest = RandomForestRegressor()
forest.fit(X_train, y_train)

# Calculate the score of the model on the test set
print("Model Score on Test Data:", forest.score(X_test, y_test))

# Save the model and the feature names
with open('saved_model.pkl', 'wb') as f:
    pickle.dump(forest, f)
    pickle.dump(df.drop(['clicks', 'date', 'ID', 'ad_type'], axis=1).columns.tolist(), f)

# Load the model and the feature names
with open('saved_model.pkl', 'rb') as f:
    loaded_forest = pickle.load(f)
    feature_names = pickle.load(f)

# Generate future dates for one and two weeks ahead
future_date_1_week = pd.Timestamp.now() + timedelta(days=7)
future_date_2_weeks = pd.Timestamp.now() + timedelta(days=14)

# Filter data based on unique 'ID' values
unique_ids = df['ID'].unique()

# Create a DataFrame for one and two weeks future dates
future_df_1_week = pd.DataFrame({'ID': unique_ids})
future_df_2_weeks = pd.DataFrame({'ID': unique_ids})

# Ordinal encode the 'ID' column in future data
future_df_1_week['ID_encoded'] = label_encoder.transform(future_df_1_week['ID'])
future_df_2_weeks['ID_encoded'] = label_encoder.transform(future_df_2_weeks['ID'])

# Add the missing columns with default values
for col in feature_names:
    if col not in future_df_1_week.columns:
        future_df_1_week[col] = 0
    if col not in future_df_2_weeks.columns:
        future_df_2_weeks[col] = 0

# Ensure the future DataFrames have the same columns as the training set in the correct order
future_df_1_week = future_df_1_week[feature_names]
future_df_2_weeks = future_df_2_weeks[feature_names]

# Debug: Print the shape of the DataFrames to check the number of features
print(f"Shape of training data: {X_train.shape}")
print(f"Shape of future data for 1 week: {future_df_1_week.shape}")
print(f"Shape of future data for 2 weeks: {future_df_2_weeks.shape}")

# Convert future DataFrames to NumPy arrays
future_df_1_week = future_df_1_week.values
future_df_2_weeks = future_df_2_weeks.values

# Make predictions for one and two weeks future dates
future_clicks_pred_1_week = loaded_forest.predict(future_df_1_week)
future_clicks_pred_2_weeks = loaded_forest.predict(future_df_2_weeks)

# Display the predictions
print("\nPredicted clicks for one week ahead:")
print(future_clicks_pred_1_week)

print("\nPredicted clicks for two weeks ahead:")
print(future_clicks_pred_2_weeks)


Model Score on Test Data: 0.9671071561902204
Shape of training data: (231620, 6)
Shape of future data for 1 week: (185, 6)
Shape of future data for 2 weeks: (185, 6)

Predicted clicks for one week ahead:
[0.36106552 0.67266049 0.43       0.77238188 0.66086804 0.72567604
 0.90448476 0.7233613  0.57752283 0.3        0.3        0.20551625
 0.51068389 0.40708061 0.20198959 0.26732831 0.45465597 0.45465597
 0.03466102 0.49492483 0.19806497 0.10910349 0.10224713 0.23983093
 0.10480057 0.26073885 0.25382325 0.20496877 0.66214743 0.07583341
 0.07583341 0.18299807 0.18299807 0.18014182 0.16559927 0.70869475
 0.04       0.46690487 0.27571221 0.23159612 0.37839477 0.47024644
 0.19435437 0.14214121 0.17938193 0.2719581  0.23077621 0.72509423
 0.20910778 0.19336341 0.09       0.16681464 0.43193571 0.13
 0.13       0.45802387 0.34761925 0.31973846 0.51463428 0.32616749
 0.30872264 0.22354277 0.31475167 0.39277757 0.27300655 0.40644745
 0.4670777  0.40714091 0.42593161 0.23975992 0.31818136 0.7487978