In [2]:
import pandas as pd
import numpy as np
from sklearn import metrics
from sklearn.ensemble import RandomForestRegressor

train_data = pd.read_csv('/content/hackathon_train_data.csv')
test_data = pd.read_csv('/content/hackathon_test_data.csv')
test_output = pd.read_csv('/content/hackathon_testoutput.csv')

train_data = train_data[['Timestamp', 'Weighted_Price']].dropna()

X_train = train_data['Timestamp'].values.reshape(-1, 1)
y_train = train_data['Weighted_Price'].values

# Ensure that X_test and y_test have the same number of rows
X_test = test_data['Timestamp'].values.reshape(-1, 1)
y_test = test_output['Weighted_Price'].values[:len(X_test)]  # Adjust y_test to match X_test length

n_estimators_to_try = [10,50,100]  # Try different numbers of trees for RandomForestRegressor

best_r2 = 0
best_model = None

for n_estimators in n_estimators_to_try:
    model = RandomForestRegressor(n_estimators=n_estimators)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    r2 = metrics.r2_score(y_test, y_pred)

    if r2 > best_r2:
        best_r2 = r2
        best_model = model

# Get the final predictions using the best model
y_pred = best_model.predict(X_test)

print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred))
print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
print('Best R2 Score:', best_r2)

Mean Absolute Error: 26.372627925588795
Mean Squared Error: 4810.6525793593855
Root Mean Squared Error: 69.35886806573032
Best R2 Score: 0.9999685500470458
