In [2]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_absolute_error

# Create Dataset
data = {
"Engine_Size": [1.2,1.5,1.8,2.0,2.2,1.3,1.6,2.4,2.0,1.4,1.7,2.5,1.8,2.2,1.5],
"Mileage": [90,70,60,50,40,85,65,30,45,80,55,25,50,35,75],
"Age": [8,6,5,4,3,7,6,2,4,7,5,1,3,2,6],
"Horsepower": [80,95,110,130,150,85,100,180,140,90,115,200,125,160,105],
"Price": [3.5,5,6,8,10,4,5.5,14,9,4.5,6.5,16,8.5,12,5.2]
}

df = pd.DataFrame(data)

print(df.head())

# Basic EDA
print("\nSummary Statistics:")
print(df.describe())

# Correlation
print("\nCorrelation Matrix:")
print(df.corr())

# Features and Target
X = df[['Engine_Size','Mileage','Age','Horsepower']]
y = df['Price']

# Train Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Model Training
model = LinearRegression()
model.fit(X_train, y_train)

print("\nIntercept:", model.intercept_)
print("Coefficients:", model.coef_)

# Prediction
y_pred = model.predict(X_test)

# Evaluation
print("\nR2 Score:", r2_score(y_test, y_pred))
print("MAE:", mean_absolute_error(y_test, y_pred))

# Predict New Car Price
new_car = pd.DataFrame({
    "Engine_Size":[2.0],
    "Mileage":[40],
    "Age":[3],
    "Horsepower":[150]
})

print("\nPredicted Price:", round(model.predict(new_car)[0],2), "Lakhs")



   Engine_Size  Mileage  Age  Horsepower  Price
0          1.2       90    8          80    3.5
1          1.5       70    6          95    5.0
2          1.8       60    5         110    6.0
3          2.0       50    4         130    8.0
4          2.2       40    3         150   10.0

Summary Statistics:
       Engine_Size    Mileage        Age  Horsepower      Price
count    15.000000  15.000000  15.000000   15.000000  15.000000
mean      1.806667  57.000000   4.600000  124.333333   7.846667
std       0.400832  20.248457   2.097618   35.700474   3.767695
min       1.200000  25.000000   1.000000   80.000000   3.500000
25%       1.500000  42.500000   3.000000   97.500000   5.100000
50%       1.800000  55.000000   5.000000  115.000000   6.500000
75%       2.100000  72.500000   6.000000  145.000000   9.500000
max       2.500000  90.000000   8.000000  200.000000  16.000000

Correlation Matrix:
             Engine_Size   Mileage       Age  Horsepower     Price
Engine_Size     1.000000 -0