##### 1. Problem Information
- **Name:** [**Daily Average Temperature Prediction**](https://platform.olimpiada-ai.ro/en/problems/20)
- **Date:** 12/02/2026
- **Type:** (Regression / Binary Classification / Multi-class / NLP / CV)

# 2. Imports

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.linear_model import SGDRegressor
from sklearn.ensemble import RandomForestRegressor,GradientBoostingRegressor
from sklearn.model_selection import cross_val_score,GridSearchCV,RandomizedSearchCV
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

# 3. Data preparation

In [2]:
train = pd.read_csv("data/train.csv")
test = pd.read_csv("data/test.csv")
print(train.shape)
train.head(5)

(500, 9)


Unnamed: 0,SampleID,humidity,wind_speed,pressure,rainfall,cloud_cover,solar_radiation,day_of_year,temperature
0,1,52.472407,20.944851,1005.553988,5.190818,26.170568,679.263245,313,5.33
1,2,87.042858,16.082891,1016.257028,4.791819,24.69788,635.169456,328,3.43
2,3,73.919637,9.285828,1026.188375,0.256421,90.625458,501.470266,7,-1.81
3,4,65.919509,24.413851,1021.966747,3.412478,24.95462,769.233001,206,24.23
4,5,39.361118,20.541935,1024.196834,3.801956,27.194973,240.332161,114,13.53


In [3]:
train.describe().round(3)

Unnamed: 0,SampleID,humidity,wind_speed,pressure,rainfall,cloud_cover,solar_radiation,day_of_year,temperature
count,500.0,500.0,500.0,500.0,500.0,500.0,500.0,500.0,500.0
mean,250.5,59.914,14.459,1015.527,4.965,49.984,453.477,181.424,8.604
std,144.482,17.921,8.565,8.916,2.87,28.581,207.006,106.136,8.055
min,1.0,30.304,0.139,1000.148,0.032,0.157,100.008,2.0,-10.32
25%,125.75,44.477,6.873,1007.237,2.411,26.811,281.87,85.75,2.45
50%,250.5,60.79,14.155,1016.192,5.089,49.601,451.298,176.5,8.34
75%,375.25,75.367,21.79,1023.32,7.374,74.329,637.534,282.0,14.84
max,500.0,89.578,29.992,1029.982,9.983,99.544,798.475,364.0,29.42


# 4. Models

In [4]:
X = train.iloc[:,1:-1]
Y = train['temperature']

In [5]:
pipeline = make_pipeline(StandardScaler(),GradientBoostingRegressor(random_state=0))

params = {
    'gradientboostingregressor__n_estimators':[50,100,150,200,500],
    'gradientboostingregressor__max_depth':[1,2,3,5,6],
}
grid_search = RandomizedSearchCV(pipeline,params,cv=3,n_iter=10,scoring="neg_mean_squared_error")

grid_search.fit(X,Y)

print("Best score:",grid_search.best_score_)
print("Best parmas:",grid_search.best_params_)

Best score: -3.9159770734364407
Best parmas: {'gradientboostingregressor__n_estimators': 150, 'gradientboostingregressor__max_depth': 2}


In [6]:
best_estimator = grid_search.best_estimator_
predictions = best_estimator.predict(test.iloc[:,1:])

# 5. Submission

In [7]:
submission = pd.DataFrame({
    "SampleID": test['SampleID'],
    "temperature": predictions,
})

submission.head()

Unnamed: 0,SampleID,temperature
0,1001,1.230186
1,1002,7.316058
2,1003,15.465184
3,1004,-0.723162
4,1005,5.603102


In [8]:
submission.to_csv("submission.csv", index=False)