In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from xgboost import XGBRegressor


In [3]:
file_path = '/content/temps.csv'  # Update this line with your CSV file path
df = pd.read_csv(file_path)


In [8]:
df.head()

Unnamed: 0,year,month,day,temp_2,temp_1,average,friend,week_Fri,week_Mon,week_Sat,week_Sun,week_Thurs,week_Tues,week_Wed
0,2019,1,1,45,45,45.6,29,True,False,False,False,False,False,False
1,2019,1,2,44,45,45.7,61,False,False,True,False,False,False,False
2,2019,1,3,45,44,45.8,56,False,False,False,True,False,False,False
3,2019,1,4,44,41,45.9,53,False,True,False,False,False,False,False
4,2019,1,5,41,40,46.0,41,False,False,False,False,False,True,False


In [10]:
df.shape

(348, 14)

In [11]:
# checking for null values
df.isnull().sum()

Unnamed: 0,0
year,0
month,0
day,0
temp_2,0
temp_1,0
average,0
friend,0
week_Fri,0
week_Mon,0
week_Sat,0


In [4]:
df = pd.get_dummies(df)
labels = df['actual']
df = df.drop('actual', axis=1)
feature_list = df.columns.tolist()
train_features, test_features, train_labels, test_labels = train_test_split(
    df, labels, test_size=0.2, random_state=42
)


In [5]:
xg_model = XGBRegressor(
    n_estimators=1000, learning_rate=0.05, max_depth=6, random_state=42, objective='reg:squarederror'
)
xg_model.fit(train_features, train_labels)


In [6]:
predictions = xg_model.predict(test_features)


In [7]:
mae = mean_absolute_error(test_labels, predictions)
mse = mean_squared_error(test_labels, predictions)
r2 = r2_score(test_labels, predictions)
errors = abs(predictions - test_labels)
mape = 100 * (errors / test_labels)
accuracy = 100 - np.mean(mape)

print('Mean Absolute Error:', round(mae, 2))
print('Mean Squared Error:', round(mse, 2))
print('R-squared:', round(r2, 2))
print('Accuracy:', round(accuracy, 2), '%')


Mean Absolute Error: 4.15
Mean Squared Error: 28.42
R-squared: 0.8
Accuracy: 93.44 %
