In [3]:

import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score



In [4]:

df = pd.read_csv('study_hours_score.csv')
df.head()

Unnamed: 0,hours,score
0,5.939322,79.188121
1,7.436704,87.279918
2,6.42487,91.372308
3,5.903949,78.44891
4,4.812893,71.569383


In [8]:
x = df[['hours']]  # feature
y = df['score']    # target

# Train model on all data
model = LinearRegression()


x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
model.fit(x_train, y_train)
y_pred = model.predict(x_test)

# Check coefficients
print('Coefficient (slope):', model.coef_[0])
print('Intercept:', model.intercept_)

#r2 score
print('R^2 Score:', r2_score(y_test, y_pred))

# Make a prediction
hours = 9
pred = model.predict(pd.DataFrame({'hours': [hours]}))[0]
print(f'Predicted score for {hours} hours: {pred:.2f}')

Coefficient (slope): 4.903247319350496
Intercept: 50.626030178169984
R^2 Score: 0.8958409432798161
Predicted score for 9 hours: 94.76


In [10]:
# House size -> price (simple linear regression)
f = pd.read_csv('house_size_price.csv')
f.head()

Unnamed: 0,size_m2,price
0,186.465316,599022.373692
1,211.140468,677753.447334
2,184.285509,619187.225271
3,134.481499,453492.20082
4,74.95929,249648.033801


In [12]:
x = f[['size_m2']] 
y = f['price']    

model_house = LinearRegression()
model_house.fit(x, y)

x_test, x_train, y_test, y_train = train_test_split(x, y, test_size=0.2, random_state=42)
model_house.fit(x_train, y_train)
y_pred = model_house.predict(x_test)
# Check coefficient and intercept
print('House size -> price')
print('Coefficient:', model_house.coef_[0])
print('Intercept:', model_house.intercept_)

#r2 score
print('R^2 Score:', r2_score(y_test, y_pred))
# Predict price for 120 m2
pred_price = model_house.predict(pd.DataFrame({'size_m2':[120]}))[0]
print(f'Predicted price for 120 m2: {pred_price:.2f}')

House size -> price
Coefficient: 2955.551956703326
Intercept: 57836.64469390432
R^2 Score: 0.9870076798470523
Predicted price for 120 m2: 412502.88


In [13]:
# Experience -> salary (simple linear regression)
a = pd.read_csv('salary_experience.csv')
a.head()

Unnamed: 0,experience_years,salary_k
0,6.088383,193.657709
1,7.109371,206.521482
2,4.036323,134.513228
3,9.143522,270.084944
4,8.196867,240.073159


In [None]:
X= a[['experience_years']]  
y = a['salary_k']            

# Train model on all data
model_ex = LinearRegression()
model_ex.fit(X, y)

x_test, x_train, y_test, y_train = train_test_split(X, y, test_size=0.2, random_state=42)
model_ex.fit(x_train, y_train)
y_pred = model_ex.predict(x_test)

# Check coefficient and intercept
print('Experience -> salary (k)')
print('Coefficient:', model_ex.coef_[0])
print('Intercept:', model_ex.intercept_)
#r2 score
print('R^2 Score:', r2_score(y_test, y_pred))

# Predict salary for 7 years experience
pred_salary = model_ex.predict(pd.DataFrame({'experience_years':[7]}))[0]
print(f'Predicted salary (k) for 7 years experience: {pred_salary:.2f}k')

Experience -> salary (k)
Coefficient: 24.95310136088613
Intercept: 30.016066524448632
R^2 Score: 0.9987902808227925
Predicted salary (k) for 7 years experience: 204.69k


In [15]:
# Multi-variable house price dataset
b = pd.read_csv('house_price_multi.csv')
b.head()

Unnamed: 0,size_m2,bedrooms,price_k
0,109.174501,3,300.203745
1,199.39159,2,502.322298
2,91.950944,3,276.684541
3,181.138557,3,517.616616
4,182.531188,1,474.585911


In [16]:
X = b[['size_m2','bedrooms']] 
y = b['price_k']               

model_multi = LinearRegression()
model_multi.fit(X, y)

x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model_multi.fit(x_train, y_train)
y_pred = model_multi.predict(x_test)
# Check coefficients and intercept
print('Multi-feature house price (k)')
print('Coefficients (size_m2, bedrooms):', model_multi.coef_)
print('Intercept:', model_multi.intercept_)

#r2 score
print('R^2 Score:', r2_score(y_test, y_pred))

# Predict price for 85 m2 and 3 bedrooms
pred_multi = model_multi.predict(pd.DataFrame({'size_m2':[85],'bedrooms':[3]}))[0]
print(f'Predicted price (k) for 85 m2, 3 bedrooms: {pred_multi:.2f}k')

Multi-feature house price (k)
Coefficients (size_m2, bedrooms): [ 2.51409078 14.47726574]
Intercept: -0.49931787461480326
R^2 Score: 0.956027573029473
Predicted price (k) for 85 m2, 3 bedrooms: 256.63k


In [18]:
# Advertising spend -> sales (multivariate)
c = pd.read_csv('advertising_sales.csv')
c.head()

Unnamed: 0,TV_spend,radio_spend,social_spend,sales_k
0,249.784924,74.719837,73.151894,78.88301
1,10.787815,79.863146,40.27063,62.290535
2,182.323172,44.580558,41.067914,60.213187
3,69.76424,16.21974,51.516816,48.394849
4,186.352496,94.056403,70.605392,82.031026


In [19]:
X = c[['TV_spend','radio_spend','social_spend']]  
y = c['sales_k']                                  

# Train model on all data
model_adv = LinearRegression()
model_adv.fit(X, y)

x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model_adv.fit(x_train, y_train)
y_pred = model_adv.predict(x_test)

# Check coefficients and intercept
print('Advertising spend -> sales (k)')
print('Coefficients (TV, radio, social):', model_adv.coef_)
print('Intercept:', model_adv.intercept_)
#r2 score
print('R^2 Score:', r2_score(y_test, y_pred))

# Predict sales for TV=160, radio=32, social=18
pred_sales = model_adv.predict(pd.DataFrame({'TV_spend':[160],
                                             'radio_spend':[32],
                                             'social_spend':[18]}))[0]
print(f'Predicted sales (k): {pred_sales:.2f}k')

Advertising spend -> sales (k)
Coefficients (TV, radio, social): [0.0413603  0.29742734 0.19645674]
Intercept: 30.091338049585147
R^2 Score: 0.9563033186641701
Predicted sales (k): 49.76k
