In [1]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import joblib

# Create a DataFrame with the given data
data = {
    'Age': [27, 35],
    'Profession': ['Salaried', 'Salaried'],
    'Marrital Status': ['Single', 'Married'],
    'Education': ['Post Graduate', 'Post Graduate'],
    'No of Dependents': [0, 2],
    'Personal loan': ['Yes', 'Yes'],
    'House Loan': ['No', 'Yes'],
    'Wife Working': ['No', 'Yes'],
    'Salary': [800000, 1400000],
    'Wife Salary': [0, 600000],
    'Total Salary': [800000, 2000000],
    'Make': ['i20', 'Ciaz'],
    'Price': [800000, 1000000]
}

df = pd.DataFrame(data)

# Convert categorical features to numerical values
label_encoders = {}
categorical_features = ['Profession', 'Marrital Status', 'Education', 'Personal loan', 'House Loan', 'Wife Working', 'Make']

for feature in categorical_features:
    le = LabelEncoder()
    df[feature] = le.fit_transform(df[feature])
    label_encoders[feature] = le

# Define features and target variable
X = df.drop('Price', axis=1)
y = df['Price']

# Split the data into training and test sets (though with 2 samples, this is more illustrative)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train the model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions and evaluate
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

# Save the model and scaler
joblib.dump(model, 'car_price_model.pkl')
joblib.dump(scaler, 'scaler.pkl')

print('Model and scaler saved successfully!')


Mean Squared Error: 40000000000.0
Model and scaler saved successfully!
