In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('room_temp_data.csv')
df.head()

Unnamed: 0,time_of_day,outside_temp,num_of_people_in_room,fan_on,ac_on,window_open,room_size,room_temp
0,Evening,25.8,5,1,1,1,Medium,23.5
1,Night,22.4,5,1,1,0,Small,19.5
2,Morning,27.8,2,0,0,0,Large,33.4
3,Evening,34.3,3,0,0,1,Medium,34.2
4,Evening,31.1,3,1,0,0,Large,33.9


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 8 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   time_of_day            500 non-null    object 
 1   outside_temp           500 non-null    float64
 2   num_of_people_in_room  500 non-null    int64  
 3   fan_on                 500 non-null    int64  
 4   ac_on                  500 non-null    int64  
 5   window_open            500 non-null    int64  
 6   room_size              500 non-null    object 
 7   room_temp              500 non-null    float64
dtypes: float64(2), int64(4), object(2)
memory usage: 31.4+ KB


In [4]:
df.isnull().sum()

time_of_day              0
outside_temp             0
num_of_people_in_room    0
fan_on                   0
ac_on                    0
window_open              0
room_size                0
room_temp                0
dtype: int64

In [5]:
import matplotlib.pyplot as plt
%matplotlib inline

In [6]:
X = df[['time_of_day', 'outside_temp', 'num_of_people_in_room', 'fan_on', 'ac_on', 'window_open', 'room_size']]

In [7]:
y = df[['room_temp']]

In [8]:
X.head()

Unnamed: 0,time_of_day,outside_temp,num_of_people_in_room,fan_on,ac_on,window_open,room_size
0,Evening,25.8,5,1,1,1,Medium
1,Night,22.4,5,1,1,0,Small
2,Morning,27.8,2,0,0,0,Large
3,Evening,34.3,3,0,0,1,Medium
4,Evening,31.1,3,1,0,0,Large


In [9]:
y.head()

Unnamed: 0,room_temp
0,23.5
1,19.5
2,33.4
3,34.2
4,33.9


In [10]:
df_encoded = pd.get_dummies(df, columns=['time_of_day', 'room_size'] ,drop_first=True)


In [11]:
X = df_encoded.drop('room_temp', axis=1)
y = df_encoded['room_temp']

In [12]:
X.head()

Unnamed: 0,outside_temp,num_of_people_in_room,fan_on,ac_on,window_open,time_of_day_Evening,time_of_day_Morning,time_of_day_Night,room_size_Medium,room_size_Small
0,25.8,5,1,1,1,True,False,False,True,False
1,22.4,5,1,1,0,False,False,True,False,True
2,27.8,2,0,0,0,False,True,False,False,False
3,34.3,3,0,0,1,True,False,False,True,False
4,31.1,3,1,0,0,True,False,False,False,False


In [13]:
X.shape

(500, 10)

In [14]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)


In [15]:
X_train.shape

(375, 10)

In [16]:
from sklearn.preprocessing import StandardScaler
scalar = StandardScaler()


In [17]:
X_train = scalar.fit_transform(X_train)
X_test = scalar.transform(X_test)

In [18]:
X_train

array([[-0.72079671, -1.45236875, -1.62504668, ..., -0.68179451,
         1.19794196, -0.79399923],
       [ 1.81585622, -0.84721511,  0.61536694, ..., -0.68179451,
         1.19794196, -0.79399923],
       [ 0.47975659, -1.45236875, -1.62504668, ..., -0.68179451,
         1.19794196, -0.79399923],
       ...,
       [-0.83697928,  0.96824584,  0.61536694, ...,  1.4667176 ,
        -0.83476498, -0.79399923],
       [ 1.29303463, -1.45236875, -1.62504668, ...,  1.4667176 ,
         1.19794196, -0.79399923],
       [-0.19797511,  0.36309219,  0.61536694, ..., -0.68179451,
         1.19794196, -0.79399923]], shape=(375, 10))

In [19]:
from sklearn.linear_model import RidgeCV
ridge = RidgeCV()



In [20]:
ridge.fit(X_train, y_train)

In [21]:
y_pred = ridge.predict(X_test)

In [22]:
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
score = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
print(f"r2 score: {score}")
print(f"mae: {mae}")
print(f"rmse :{rmse}")

r2 score: 0.9670125835039431
mae: 0.8570372083828054
rmse :1.07560687678777


In [23]:
new_input = {
    'time_of_day': 'Morning',
    'outside_temp': 32,
    'num_of_people_in_room': 2,
    'fan_on': 1,
    'ac_on': 0,
    'window_open': 1,
    'room_size': 'Small'
}
df_input = pd.DataFrame([new_input])

In [24]:
df_input = pd.get_dummies(df_input)


In [25]:
missing_cols = set(X.columns) - set(df_input.columns)
for col in missing_cols:
    df_input[col] = 0

In [26]:
df_input = df_input[X.columns]

In [27]:
X_new = scalar.transform(df_input)
prediction = ridge.predict(X_new)

In [28]:
print(round(prediction[0],2))

29.11


In [29]:
import pickle

pickle.dump(scalar, open('scaler.pkl', 'wb'))
pickle.dump(ridge, open('ridge.pkl', 'wb'))
pickle.dump(X.columns,open('featureColumns.pkl', 'wb') )

In [30]:
model = pickle.load(open('ridge.pkl', 'rb'))

In [31]:
model.predict(X_new).round(2)

array([29.11])

In [32]:
coefficients = pd.DataFrame({
    'Feature': X.columns,
    'Coefficient': ridge.coef_[0]
})
print(coefficients.sort_values('Coefficient', ascending=False))

                 Feature  Coefficient
0           outside_temp     5.245087
1  num_of_people_in_room     5.245087
2                 fan_on     5.245087
3                  ac_on     5.245087
4            window_open     5.245087
5    time_of_day_Evening     5.245087
6    time_of_day_Morning     5.245087
7      time_of_day_Night     5.245087
8       room_size_Medium     5.245087
9        room_size_Small     5.245087
