In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn import metrics

# Loading the data
car_data = pd.read_csv('data/cardata.csv')
car_data.head()
car_data.info()

# Encoding Columns
car_data.replace(
    {'Fuel_Type': {'Petrol': 0, 'Diesel': 1, 'CNG': 2}}, inplace=True)
car_data.replace({'Seller_Type': {'Dealer': 0, 'Individual': 1}}, inplace=True)
car_data.replace({'Transmission': {'Manual': 0, 'Automatic': 1}}, inplace=True)

# Discretize Selling_Price into two categories: low price (0) and high price (1)
car_data['Price_Category'] = pd.cut(
    car_data['Selling_Price'], bins=[-float('inf'), 5, float('inf')], labels=[0, 1])

# corrMatrix = car_data.corr()
# sns.heatmap(corrMatrix, annot=True, cmap="viridis")
# plt.show()

X = car_data.drop(['Car_Name', 'Selling_Price'], axis=1)
Y = car_data['Selling_Price']
X_train, X_test, Y_train, Y_test = train_test_split(
    X, Y, test_size=0.2, random_state=42)

# Loading the logistic regression model
log_reg_model = LogisticRegression()

# Fitting the model to the dataset
log_reg_model.fit(X_train, Y_train)

# Prediction on Training data
training_data_prediction = log_reg_model.predict(X_train)

# Accuracy Score
train_accuracy = metrics.accuracy_score(Y_train, training_data_prediction)
print("Accuracy Score - Training: ", train_accuracy)

Y_pred = log_reg_model.predict(X_test)
test_accuracy = metrics.accuracy_score(Y_test, Y_pred)
print("Accuracy Score - Test: ", test_accuracy)

sns.regplot(Y_test, Y_pred, scatter_kws={
            "color": "green"}, line_kws={"color": "blue"})


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 301 entries, 0 to 300
Data columns (total 9 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Car_Name       301 non-null    object 
 1   Year           301 non-null    int64  
 2   Selling_Price  301 non-null    float64
 3   Present_Price  301 non-null    float64
 4   Kms_Driven     301 non-null    int64  
 5   Fuel_Type      301 non-null    object 
 6   Seller_Type    301 non-null    object 
 7   Transmission   301 non-null    object 
 8   Owner          301 non-null    int64  
dtypes: float64(2), int64(3), object(4)
memory usage: 21.3+ KB


ValueError: Unknown label type: 'continuous'

In [2]:
import pickle
from sklearn.linear_model import LinearRegression


# Saving the model to a file
filename = 'log_reg_model.pkl'
with open(filename, 'wb') as file:
    pickle.dump(log_reg_model, file)
