# Load Dataset

In [26]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import confusion_matrix,accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,LabelEncoder

In [27]:
df=pd.read_csv('automobile.csv')

In [28]:
df=df.dropna()

# Remove Null value

In [29]:
df.isnull().sum()

symboling            0
normalized-losses    0
make                 0
fuel-type            0
aspiration           0
num-of-doors         0
body-style           0
drive-wheels         0
engine-location      0
wheel-base           0
length               0
width                0
height               0
curb-weight          0
engine-type          0
num-of-cylinders     0
engine-size          0
fuel-system          0
bore                 0
stroke               0
compression-ratio    0
horsepower           0
peak-rpm             0
city-mpg             0
highway-mpg          0
price                0
dtype: int64

# Label Encoding

In [30]:
categorical_features = ['fuel-type','make','aspiration','body-style','drive-wheels','fuel-system','num-of-doors','engine-location','engine-type','num-of-cylinders','bore','stroke','peak-rpm','drive-wheels','horsepower']

# Initialize LabelEncoder
label_encoders = {}

for feature in categorical_features:
    label_encoders[feature] = LabelEncoder()
    df.loc[:, feature] = label_encoders[feature].fit_transform(df[feature])

In [31]:
df['num-of-cylinders']

3      2
4      1
6      1
8      1
9      2
      ..
197    2
198    3
199    3
200    2
201    2
Name: num-of-cylinders, Length: 164, dtype: object

# Normalization

In [32]:
from sklearn.preprocessing import MinMaxScaler
scaler=MinMaxScaler()
df_scaled=scaler.fit_transform(df)

In [33]:
x=df.drop('make',axis=1)

In [34]:
y=df['make']

In [35]:
x_scaled=scaler.fit_transform(x)

# Splitting of data

In [47]:
x_train,y_train,x_test,y_test=train_test_split(x_scaled,y,test_size=0.3,random_state=42)

# Model Training

In [48]:
model=LinearRegression()

In [49]:
model.fit(x_train,y_train)

# Model Prediction

In [None]:
y_pred=model.predict(x_test)

In [66]:
y_pred

array([[0.67283895, 0.19959093, 1.02028829, ..., 0.33329765, 0.39142316,
        0.30360927],
       [0.52717996, 0.04860346, 1.03092976, ..., 0.34155674, 0.42412204,
        0.12802746],
       [0.69889684, 0.27722848, 0.96138858, ..., 0.20897369, 0.26710106,
        0.23721547],
       ...,
       [0.70209454, 0.28086821, 0.96083941, ..., 0.20955054, 0.26816185,
        0.23450291],
       [0.53663961, 0.22254091, 0.96217095, ..., 0.26281832, 0.32582762,
        0.16002808],
       [0.61282942, 0.4635326 , 0.75660108, ..., 0.24609068, 0.30135858,
        0.34249527]])

# Accuracy

In [67]:
print(accuracy_score(y_pred,y_test))

0.895643190830941


# Explanation

In [None]:
# Here,in this case based on cars dataset we have tell that which model can be suggested based on features so I have used Linear Regression model 
# because here target value is car model prediction i.e. a continuous value. So, first i have uploaded dataset then removed null value,then done 
# preprocessing of the dataset using labelEncoding technique so that all the data is converted into paricular format.
# STEP 1 : Load dataaset
# STEP 2 : Remove/check null values
# STEP 3 : Do PreProcessing using LabelEncoding technique
# STEP 4 : Scaled data using MinMaxScaler
# STEP 5 : Divided data in 70-30 ratio as test and train data
# STEP 6 : Trained Linear Regression model
# STEP 7: Tested the data
# STEP 8 : Printed the accuracy
