In [55]:
from ucimlrepo import fetch_ucirepo 

# fetch dataset 
automobile = fetch_ucirepo(id=10) 
  
# data (as pandas dataframes) 
features = automobile.data.features 
targets = automobile.data.targets 
  
# metadata 
print(automobile.metadata) 
  
# variable information 
print(automobile.variables) 


{'uci_id': 10, 'name': 'Automobile', 'repository_url': 'https://archive.ics.uci.edu/dataset/10/automobile', 'data_url': 'https://archive.ics.uci.edu/static/public/10/data.csv', 'abstract': "From 1985 Ward's Automotive Yearbook", 'area': 'Other', 'tasks': ['Regression'], 'characteristics': ['Multivariate'], 'num_instances': 205, 'num_features': 25, 'feature_types': ['Categorical', 'Integer', 'Real'], 'demographics': [], 'target_col': ['symboling'], 'index_col': None, 'has_missing_values': 'yes', 'missing_values_symbol': 'NaN', 'year_of_dataset_creation': 1985, 'last_updated': 'Thu Aug 10 2023', 'dataset_doi': '10.24432/C5B01C', 'creators': ['Jeffrey Schlimmer'], 'intro_paper': None, 'additional_info': {'summary': 'This data set consists of three types of entities: (a) the specification of an auto in terms of various characteristics, (b) its assigned insurance risk rating, (c) its normalized losses in use as compared to other cars.  The second rating corresponds to the degree to which th

In [56]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import hvplot.pandas

from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.linear_model import LinearRegression

%matplotlib inline




In [57]:
features.shape

(205, 25)

In [58]:
targets.shape

(205, 1)

In [59]:
autoMob = features.join(targets, how= 'inner')
autoMob

Unnamed: 0,price,highway-mpg,city-mpg,peak-rpm,horsepower,compression-ratio,stroke,bore,fuel-system,engine-size,...,wheel-base,engine-location,drive-wheels,body-style,num-of-doors,aspiration,fuel-type,make,normalized-losses,symboling
0,13495.0,27,21,5000.0,111.0,9.0,2.68,3.47,mpfi,130,...,88.6,front,rwd,convertible,2.0,std,gas,alfa-romero,,3
1,16500.0,27,21,5000.0,111.0,9.0,2.68,3.47,mpfi,130,...,88.6,front,rwd,convertible,2.0,std,gas,alfa-romero,,3
2,16500.0,26,19,5000.0,154.0,9.0,3.47,2.68,mpfi,152,...,94.5,front,rwd,hatchback,2.0,std,gas,alfa-romero,,1
3,13950.0,30,24,5500.0,102.0,10.0,3.40,3.19,mpfi,109,...,99.8,front,fwd,sedan,4.0,std,gas,audi,164.0,2
4,17450.0,22,18,5500.0,115.0,8.0,3.40,3.19,mpfi,136,...,99.4,front,4wd,sedan,4.0,std,gas,audi,164.0,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
200,16845.0,28,23,5400.0,114.0,9.5,3.15,3.78,mpfi,141,...,109.1,front,rwd,sedan,4.0,std,gas,volvo,95.0,-1
201,19045.0,25,19,5300.0,160.0,8.7,3.15,3.78,mpfi,141,...,109.1,front,rwd,sedan,4.0,turbo,gas,volvo,95.0,-1
202,21485.0,23,18,5500.0,134.0,8.8,2.87,3.58,mpfi,173,...,109.1,front,rwd,sedan,4.0,std,gas,volvo,95.0,-1
203,22470.0,27,26,4800.0,106.0,23.0,3.40,3.01,idi,145,...,109.1,front,rwd,sedan,4.0,turbo,diesel,volvo,95.0,-1


In [60]:
autoMob.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 205 entries, 0 to 204
Data columns (total 26 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   price              201 non-null    float64
 1   highway-mpg        205 non-null    int64  
 2   city-mpg           205 non-null    int64  
 3   peak-rpm           203 non-null    float64
 4   horsepower         203 non-null    float64
 5   compression-ratio  205 non-null    float64
 6   stroke             201 non-null    float64
 7   bore               201 non-null    float64
 8   fuel-system        205 non-null    object 
 9   engine-size        205 non-null    int64  
 10  num-of-cylinders   205 non-null    int64  
 11  engine-type        205 non-null    object 
 12  curb-weight        205 non-null    int64  
 13  height             205 non-null    float64
 14  width              205 non-null    float64
 15  length             205 non-null    float64
 16  wheel-base         205 non

In [61]:
autoMob.isna().value_counts()

price  highway-mpg  city-mpg  peak-rpm  horsepower  compression-ratio  stroke  bore   fuel-system  engine-size  num-of-cylinders  engine-type  curb-weight  height  width  length  wheel-base  engine-location  drive-wheels  body-style  num-of-doors  aspiration  fuel-type  make   normalized-losses  symboling
False  False        False     False     False       False              False   False  False        False        False             False        False        False   False  False   False       False            False         False       False         False       False      False  False              False        159
                                                                                                                                                                                                                                                                                      True               False         34
                                                                 

In [62]:
autoMob.dropna(inplace=True)

In [63]:
autoMob.isna().any()

price                False
highway-mpg          False
city-mpg             False
peak-rpm             False
horsepower           False
compression-ratio    False
stroke               False
bore                 False
fuel-system          False
engine-size          False
num-of-cylinders     False
engine-type          False
curb-weight          False
height               False
width                False
length               False
wheel-base           False
engine-location      False
drive-wheels         False
body-style           False
num-of-doors         False
aspiration           False
fuel-type            False
make                 False
normalized-losses    False
symboling            False
dtype: bool

In [64]:
autoMob.duplicated()

3      False
4      False
6      False
8      False
10     False
       ...  
200    False
201    False
202    False
203    False
204    False
Length: 159, dtype: bool

In [71]:
autoMob['engine-location']

3      front
4      front
6      front
8      front
10     front
       ...  
200    front
201    front
202    front
203    front
204    front
Name: engine-location, Length: 159, dtype: object

In [72]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()


autoMob['encFuelSystem'] = le.fit_transform(autoMob["fuel-system"])
autoMob['encEngineType'] = le.fit_transform(autoMob['engine-type'])
autoMob['encEngineLoc'] = le.fit_transform(autoMob['engine-location'])
autoMob['encDriveWheel'] = le.fit_transform(autoMob['drive-wheels'])
autoMob['encBodyStyle'] = le.fit_transform(autoMob['body-style'])
autoMob['encAspiration'] = le.fit_transform(autoMob['aspiration'])
autoMob['encMake'] = le.fit_transform(autoMob['make'])
autoMob['encFuelType'] = le.fit_transform(autoMob['fuel-type'])
autoMob


Unnamed: 0,price,highway-mpg,city-mpg,peak-rpm,horsepower,compression-ratio,stroke,bore,fuel-system,engine-size,...,normalized-losses,symboling,encFuelSystem,encEngineType,encEngineLoc,encDriveWheel,encBodyStyle,encAspiration,encMake,encFuelType
3,13950.0,30,24,5500.0,102.0,10.0,3.40,3.19,mpfi,109,...,164.0,2,4,2,0,1,3,0,0,1
4,17450.0,22,18,5500.0,115.0,8.0,3.40,3.19,mpfi,136,...,164.0,2,4,2,0,0,3,0,0,1
6,17710.0,25,19,5500.0,110.0,8.5,3.40,3.19,mpfi,136,...,158.0,1,4,2,0,1,3,0,0,1
8,23875.0,20,17,5500.0,140.0,8.3,3.40,3.13,mpfi,131,...,158.0,1,4,2,0,1,3,1,0,1
10,16430.0,29,23,5800.0,101.0,8.8,2.80,3.50,mpfi,108,...,192.0,2,4,2,0,2,3,0,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
200,16845.0,28,23,5400.0,114.0,9.5,3.15,3.78,mpfi,141,...,95.0,-1,4,2,0,2,3,0,17,1
201,19045.0,25,19,5300.0,160.0,8.7,3.15,3.78,mpfi,141,...,95.0,-1,4,2,0,2,3,1,17,1
202,21485.0,23,18,5500.0,134.0,8.8,2.87,3.58,mpfi,173,...,95.0,-1,4,4,0,2,3,0,17,1
203,22470.0,27,26,4800.0,106.0,23.0,3.40,3.01,idi,145,...,95.0,-1,2,2,0,2,3,1,17,0
