In [4]:
# NSS AARTHI
# !pip install fbprophet

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestRegressor
# from fbprophet import Prophet # Now this import should work
from keras.models import Sequential
from keras.layers import LSTM, Dense

# Load data
train_data = pd.read_csv('/content/train.csv')
test_data = pd.read_csv('/content/test.csv')

In [5]:
# Convert dates
train_data['date'] = pd.to_datetime(train_data['date'])
test_data['date'] = pd.to_datetime(test_data['date'])

# Fill missing values
train_data.fillna({'ad_spend': train_data['ad_spend'].median()}, inplace=True)
test_data.fillna({'ad_spend': test_data['ad_spend'].median()}, inplace=True)

#Feature engineering
train_data['day_of_week'] = train_data['date'].dt.dayofweek
train_data['month'] = train_data['date'].dt.month
train_data['lag_1'] = train_data['units'].shift(1)
train_data.dropna(inplace=True)


In [6]:
X = train_data[['ad_spend', 'day_of_week', 'month', 'lag_1']]
y = train_data['units']

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
rf_predictions = rf_model.predict(X_val)
rf_mse = mean_squared_error(y_val, rf_predictions)
print(f'Random Forest MSE: {rf_mse}')


Random Forest MSE: 3239.5734415056418


In [12]:
test_data['date'] = pd.to_datetime(test_data['date'])
test_data.sort_values('date', inplace=True)

# Extract day of the week and month from the 'date' column if necessary
test_data['day_of_week'] = test_data['date'].dt.dayofweek
test_data['month'] = test_data['date'].dt.month


test_data['lag_1'] = test_data['ad_spend'].shift(1).fillna(0)  # Using ad_spend for lag creation

# Select the same features used in the model training
test_features = test_data[['ad_spend', 'day_of_week', 'month', 'lag_1']]


In [13]:
# Make predictions
predictions = rf_model.predict(test_features)


In [14]:
# Assuming the test_data has an 'ID' column as in your provided sample
test_data['TARGET'] = predictions

# Create a DataFrame for submission
submission = test_data[['ID', 'TARGET']]

# Save the submission to a CSV file
submission.to_csv('/content/submissions.csv', index=False)


In [8]:
X_train_lstm = np.reshape(X_train.values, (X_train.shape[0], 1, X_train.shape[1]))
X_val_lstm = np.reshape(X_val.values, (X_val.shape[0], 1, X_val.shape[1]))

lstm_model = Sequential([
    LSTM(50, input_shape=(1, X_train.shape[1])),
    Dense(1)
])
lstm_model.compile(optimizer='adam', loss='mean_squared_error')
lstm_model.fit(X_train_lstm, y_train, epochs=20, batch_size=1, verbose=2)

lstm_predictions = lstm_model.predict(X_val_lstm)
lstm_mse = mean_squared_error(y_val, [x[0] for x in lstm_predictions])
print(f'LSTM MSE: {lstm_mse}')


  super().__init__(**kwargs)


Epoch 1/20
59628/59628 - 117s - 2ms/step - loss: 4094.8955
Epoch 2/20
59628/59628 - 114s - 2ms/step - loss: 4072.0369
Epoch 3/20
59628/59628 - 139s - 2ms/step - loss: 4052.3289
Epoch 4/20
59628/59628 - 146s - 2ms/step - loss: 4031.1575
Epoch 5/20
59628/59628 - 117s - 2ms/step - loss: 3992.2595
Epoch 6/20
59628/59628 - 114s - 2ms/step - loss: 3971.3132
Epoch 7/20
59628/59628 - 142s - 2ms/step - loss: 3970.1990
Epoch 8/20
59628/59628 - 114s - 2ms/step - loss: 3987.0161
Epoch 9/20
59628/59628 - 149s - 2ms/step - loss: 3984.0959
Epoch 10/20
59628/59628 - 113s - 2ms/step - loss: 3948.0845
Epoch 11/20
59628/59628 - 146s - 2ms/step - loss: 3939.2422
Epoch 12/20
59628/59628 - 114s - 2ms/step - loss: 3925.4709
Epoch 13/20
59628/59628 - 144s - 2ms/step - loss: 3955.0356
Epoch 14/20
59628/59628 - 140s - 2ms/step - loss: 3957.1453
Epoch 15/20
59628/59628 - 113s - 2ms/step - loss: 3920.3359
Epoch 16/20
59628/59628 - 151s - 3ms/step - loss: 3920.9014
Epoch 17/20
59628/59628 - 141s - 2ms/step - loss:

In [15]:

test_features_lstm = np.reshape(test_features.values, (test_features.shape[0], 1, test_features.shape[1]))


In [16]:
# predictions using the trained LSTM model
test_predictions_lstm = lstm_model.predict(test_features_lstm)
#Flatten the predictions to fit the submission format
test_predictions_lstm = test_predictions_lstm.flatten()


[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step


In [None]:
# Assuming the test_data has an 'ID' column as in your provided sample
test_data['TARGET'] = test_predictions_lstm

# Create a DataFrame for submission
submission_lstm = test_data[['ID', 'TARGET']]

# Save the submission to a CSV file
submission_lstm.to_csv('/content/lstm_submissions.csv', index=False)
