In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

In [6]:
data = pd.read_csv('train.csv')
data.head()

Unnamed: 0.1,Unnamed: 0,Name,Location,Year,Kilometers_Driven,Fuel_Type,Transmission,Owner_Type,Mileage,Engine,Power,Seats,New_Price,Price
0,1,Hyundai Creta 1.6 CRDi SX Option,Pune,2015,41000,Diesel,Manual,First,19.67 kmpl,1582 CC,126.2 bhp,5.0,,12.5
1,2,Honda Jazz V,Chennai,2011,46000,Petrol,Manual,First,13 km/kg,1199 CC,88.7 bhp,5.0,8.61 Lakh,4.5
2,3,Maruti Ertiga VDI,Chennai,2012,87000,Diesel,Manual,First,20.77 kmpl,1248 CC,88.76 bhp,7.0,,6.0
3,4,Audi A4 New 2.0 TDI Multitronic,Coimbatore,2013,40670,Diesel,Automatic,Second,15.2 kmpl,1968 CC,140.8 bhp,5.0,,17.74
4,6,Nissan Micra Diesel XV,Jaipur,2013,86999,Diesel,Manual,First,23.08 kmpl,1461 CC,63.1 bhp,5.0,,3.5


In [11]:
selected_features = ['Year', 'Kilometers_Driven', 'Fuel_Type', 'Transmission', 'Owner_Type']
target = 'Price'


In [12]:
data_selected = data[selected_features + [target]].dropna()
data_encoded = pd.get_dummies(data_selected, columns=['Fuel_Type', 'Transmission', 'Owner_Type'], drop_first=True)


In [13]:
X = data_encoded.drop(columns=target)
y = data_encoded[target]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [14]:
model = LinearRegression()
model.fit(X_train, y_train)


In [15]:
coefficients = pd.DataFrame({'Feature': X.columns, 'Coefficient': model.coef_})
intercept = model.intercept_


In [16]:
y_pred = model.predict(X_test)
r2 = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)


In [17]:
print("✅ Task Completed!\n")
print("📊 Feature Coefficients Table:")
print(coefficients)
print(f"\n📈 Intercept: {intercept:.2f}")
print(f"\n🎯 Model Performance:\n- R² Score: {r2:.2f}\n- Mean Squared Error (MSE): {mse:.2f}")


✅ Task Completed!

📊 Feature Coefficients Table:
                     Feature  Coefficient
0                       Year     0.817778
1          Kilometers_Driven     0.000003
2         Fuel_Type_Electric    -8.509375
3           Fuel_Type_Petrol    -5.086182
4        Transmission_Manual   -13.270467
5  Owner_Type_Fourth & Above     0.525758
6          Owner_Type_Second    -0.069553
7           Owner_Type_Third    -0.071522

📈 Intercept: -1625.37

🎯 Model Performance:
- R² Score: 0.45
- Mean Squared Error (MSE): 64.25


In [18]:
largest_impact_feature = coefficients.loc[coefficients['Coefficient'].abs().idxmax()]
print(f"\n🔹 Feature with Largest Impact: {largest_impact_feature['Feature']} ({largest_impact_feature['Coefficient']:.2f})")



🔹 Feature with Largest Impact: Transmission_Manual (-13.27)
