In [None]:
# Common imports
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import warnings
warnings.filterwarnings('ignore')
pd.options.display.max_columns = 20

In [None]:
# Study hours vs score (simple linear regression)
df = pd.read_csv('study_hours_score.csv')
df.head()

Unnamed: 0,hours,score
0,5.939322,79.188121
1,7.436704,87.279918
2,6.424870,91.372308
3,5.903949,78.448910
4,4.812893,71.569383
...,...,...
995,1.879087,59.347150
996,5.634300,69.185676
997,9.445708,93.217275
998,3.057819,66.255163


In [None]:
# Prepare data
X = df[['hours']]
y = df['score']
# train/test split for a realistic evaluation
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LinearRegression()
model.fit(X_train, y_train)
# Metrics on test set
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)
print('Study hours -> score')
print('Coefficient (slope):', model.coef_[0])
print('Intercept:', model.intercept_)
print(f'RMSE: {rmse:.3f}, R2: {r2:.3f}')
# Example prediction
hours = 9
pred = model.predict(pd.DataFrame({'hours':[hours]}))[0]
print(f'Predicted score for {hours} hours: {pred:.2f}')

[4.94609019]
50.439746059106994
0.877700309825362


array([94.95455775])

In [None]:
# House size -> price (simple linear regression)
f = pd.read_csv('house_size_price.csv')
f.head()

Unnamed: 0,size_m2,price
0,186.465316,599022.373692
1,211.140468,677753.447334
2,184.285509,619187.225271
3,134.481499,453492.200820
4,74.959290,249648.033801
...,...,...
995,120.301916,417630.829078
996,108.798251,397790.599625
997,234.901042,786611.210013
998,120.214747,434795.293273


In [None]:
X = f[['size_m2']]
y = f['price']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model_house = LinearRegression()
model_house.fit(X_train, y_train)
y_pred = model_house.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)
print('House size -> price')
print('Coefficient:', model_house.coef_[0])
print('Intercept:', model_house.intercept_)
print(f'RMSE: {rmse:.2f}, R2: {r2:.3f}')
# Example prediction for 120 m2
pred_price = model_house.predict(pd.DataFrame({'size_m2':[120]}))[0]
print(f'Predicted price for 120 m2: {pred_price:.2f}')

array([409393.71142136])

In [None]:
# Experience -> salary (simple linear regression)
a = pd.read_csv('salary_experience.csv')
a.head()

Unnamed: 0,experience_years,salary_k
0,6.088383,193.657709
1,7.109371,206.521482
2,4.036323,134.513228
3,9.143522,270.084944
4,8.196867,240.073159
...,...,...
995,17.145433,453.254885
996,19.385876,507.022031
997,17.724494,473.990924
998,1.652201,75.688656


In [None]:
X = a[['experience_years']]
y = a['salary_k']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=1)
model_ex = LinearRegression()
model_ex.fit(X_train, y_train)
y_pred = model_ex.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)
print('Experience -> salary (k)')
print('Coefficient:', model_ex.coef_[0])
print('Intercept:', model_ex.intercept_)
print(f'RMSE: {rmse:.2f}, R2: {r2:.3f}')
# Prediction for 7 years
pred_salary = model_ex.predict(pd.DataFrame({'experience_years':[7]}))[0]
print(f'Predicted salary (k) for 7 years experience: {pred_salary:.2f}k')

array([205.11816509])

In [None]:
# Multi-variable house price dataset
b = pd.read_csv('house_price_multi.csv')
b.head()

Unnamed: 0,size_m2,bedrooms,price_k
0,109.174501,3,300.203745
1,199.391590,2,502.322298
2,91.950944,3,276.684541
3,181.138557,3,517.616616
4,182.531188,1,474.585911
...,...,...,...
995,136.398887,4,388.236189
996,189.146038,1,485.508485
997,137.234862,3,383.451102
998,188.753203,3,482.486889


In [None]:
X = b[['size_m2','bedrooms']]
y = b['price_k']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=7)
model_multi = LinearRegression()
model_multi.fit(X_train, y_train)
y_pred = model_multi.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)
print('Multi-feature house price (k)')
print('Coefficients (size_m2, bedrooms):', model_multi.coef_)
print('Intercept:', model_multi.intercept_)
print(f'RMSE: {rmse:.3f}, R2: {r2:.3f}')
# Prediction for 85 m2 and 3 bedrooms
pred_multi = model_multi.predict(pd.DataFrame({'size_m2':[85],'bedrooms':[3]}))[0]
print(f'Predicted price (k) for 85 m2, 3 bedrooms: {pred_multi:.2f}k')

[ 2.48493486 14.75492814]
2.4942087107460225
0.9626421569144518


array([257.97845613])

In [None]:
# Advertising spend -> sales (multivariate)
c = pd.read_csv('advertising_sales.csv')
c.head()

Unnamed: 0,TV_spend,radio_spend,social_spend,sales_k
0,249.784924,74.719837,73.151894,78.883010
1,10.787815,79.863146,40.270630,62.290535
2,182.323172,44.580558,41.067914,60.213187
3,69.764240,16.219740,51.516816,48.394849
4,186.352496,94.056403,70.605392,82.031026
...,...,...,...,...
995,62.343337,91.825738,53.125258,72.976675
996,72.385360,49.941945,8.673928,48.687981
997,113.622860,61.524799,56.715654,68.961509
998,96.848315,88.416795,44.069844,69.134915


In [None]:
X = c[['TV_spend','radio_spend','social_spend']]
y = c['sales_k']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=11)
model_adv = LinearRegression()
model_adv.fit(X_train, y_train)
y_pred = model_adv.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)
print('Advertising spend -> sales (k)')
print('Coefficients (TV, radio, social):', model_adv.coef_)
print('Intercept:', model_adv.intercept_)
print(f'RMSE: {rmse:.3f}, R2: {r2:.3f}')
# Predict when TV=160, radio=32, social=18
pred_sales = model_adv.predict(pd.DataFrame({'TV_spend':[160],'radio_spend':[32],'social_spend':[18]}))[0]
print(f'Predicted sales (k): {pred_sales:.2f}k')

[0.0414836  0.29588444 0.19820709]
30.058753929007658
0.9581514541240409


array([49.73215881])