In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import LabelEncoder, StandardScaler, PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

In [None]:

# Load the dataset
df = pd.read_csv("TV_Final.csv")

# Separate features and target variable
X = df.drop(columns='Selling Price(USD)')
y = df['Selling Price(USD)']

In [None]:
# Apply Label Encoding to categorical variables
label_encoders = {}
for column in X.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    X[column] = le.fit_transform(X[column])
    label_encoders[column] = le

# Adding Polynomial Features
poly = PolynomialFeatures(degree=2, include_bias=False)
X_poly = poly.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_poly, y, test_size=0.2, random_state=42)

In [None]:
# Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train the linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
accuracy = r2 * 100

print(f"Mean Squared Error: {mse}")
print(f"R-squared: {r2}")
print(f"Accuracy: {accuracy}%")

# Cross-validation to check the stability of the model
cv_scores = cross_val_score(model, X_poly, y, cv=10, scoring='r2')
mean_cv_score = cv_scores.mean() * 100

print(f"Cross-validated R-squared: {mean_cv_score}%")

# Comparison of predicted values and actual values
comparison = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
print(comparison)


Mean Squared Error: 255009.6291532833
R-squared: 0.559328612421284
Accuracy: 55.932861242128396%
Cross-validated R-squared: 31.244654985675822%
     Actual    Predicted
649     299   359.345718
761     120   184.328956
545     167   165.092774
367    1462  2198.977979
361     870   561.168607
..      ...          ...
344    1315  1989.860507
581    1793  2375.795426
872     150   198.100193
449     718   872.669076
789     431   295.872190

[183 rows x 2 columns]


In [None]:
pip install joblib




In [None]:
import joblib

['scaler.pkl']

In [None]:
import joblib

joblib.dump(model, 'linear_regression_model.pkl')
joblib.dump(scaler, 'scaler.pkl')
joblib.dump(poly, 'poly.pkl')
joblib.dump(label_encoders, 'label_encoders.pkl')

['label_encoders.pkl']