In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from keras import Sequential
from keras.layers import Dense
from sklearn.datasets import load_iris
from tensorflow.keras import Sequential
from sklearn.utils import all_estimators
from tensorflow.keras.layers import Dense
from urllib.request import urlopen, urlretrieve
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

In [2]:
## 데이터 로딩
URL = 'https://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data'

FILE = 'mpg_data'

ret = urlretrieve(URL, FILE)

In [3]:
# re(정규표현식) => \s : 화이트스페이스
df=pd.read_csv(URL, sep='\s+', header=None)
print(df.head())
print('---------------------------------------------------')
print(df.info())

      0  1      2      3       4     5   6  7                          8
0  18.0  8  307.0  130.0  3504.0  12.0  70  1  chevrolet chevelle malibu
1  15.0  8  350.0  165.0  3693.0  11.5  70  1          buick skylark 320
2  18.0  8  318.0  150.0  3436.0  11.0  70  1         plymouth satellite
3  16.0  8  304.0  150.0  3433.0  12.0  70  1              amc rebel sst
4  17.0  8  302.0  140.0  3449.0  10.5  70  1                ford torino
---------------------------------------------------
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 398 entries, 0 to 397
Data columns (total 9 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   0       398 non-null    float64
 1   1       398 non-null    int64  
 2   2       398 non-null    float64
 3   3       398 non-null    object 
 4   4       398 non-null    float64
 5   5       398 non-null    float64
 6   6       398 non-null    int64  
 7   7       398 non-null    int64  
 8   8       398 non-null    object 

In [4]:
print(df.isnull().sum())
print(df.describe())

0    0
1    0
2    0
3    0
4    0
5    0
6    0
7    0
8    0
dtype: int64
                0           1           2            4           5  \
count  398.000000  398.000000  398.000000   398.000000  398.000000   
mean    23.514573    5.454774  193.425879  2970.424623   15.568090   
std      7.815984    1.701004  104.269838   846.841774    2.757689   
min      9.000000    3.000000   68.000000  1613.000000    8.000000   
25%     17.500000    4.000000  104.250000  2223.750000   13.825000   
50%     23.000000    4.000000  148.500000  2803.500000   15.500000   
75%     29.000000    8.000000  262.000000  3608.000000   17.175000   
max     46.600000    8.000000  455.000000  5140.000000   24.800000   

                6           7  
count  398.000000  398.000000  
mean    76.010050    1.572864  
std      3.697627    0.802055  
min     70.000000    1.000000  
25%     73.000000    1.000000  
50%     76.000000    1.000000  
75%     79.000000    2.000000  
max     82.000000    3.000000  


In [5]:
df.columns = ['mpg','cylinders', 'displacement', 'horsepower', 'weight', 'acceleration',
              'model year', 'origin', 'car name']
df.head(3)

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model year,origin,car name
0,18.0,8,307.0,130.0,3504.0,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350.0,165.0,3693.0,11.5,70,1,buick skylark 320
2,18.0,8,318.0,150.0,3436.0,11.0,70,1,plymouth satellite


In [6]:
df['horsepower'].unique()
df['horsepower'] = df['horsepower'].replace('?',np.nan)
df['horsepower'] = df['horsepower'].astype(float)
df['horsepower'].mean()
df['horsepower'] = df['horsepower'].replace(np.nan,'104.5')
df['horsepower']=df.horsepower.astype('float64')

In [7]:
df['mpg']=df['mpg']*0.62

In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 398 entries, 0 to 397
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   mpg           398 non-null    float64
 1   cylinders     398 non-null    int64  
 2   displacement  398 non-null    float64
 3   horsepower    398 non-null    float64
 4   weight        398 non-null    float64
 5   acceleration  398 non-null    float64
 6   model year    398 non-null    int64  
 7   origin        398 non-null    int64  
 8   car name      398 non-null    object 
dtypes: float64(5), int64(3), object(1)
memory usage: 28.1+ KB


In [9]:
Le = LabelEncoder()
df['cylinders'] = Le.fit_transform(df['cylinders'])
df['model year'] = Le.fit_transform(df['model year'])
df['origin'] = Le.fit_transform(df['origin'])

In [10]:
data = df.drop(['mpg', 'car name'], axis='columns')
target = df['mpg']

In [11]:
train_x, test_x, train_y, test_y = train_test_split(data, target, test_size=0.2,
                                                   random_state=42)

In [12]:
ss = StandardScaler()
train_x_scaled = ss.fit_transform(train_x)
test_x_scaled = ss.transform(test_x)

In [13]:
models = all_estimators(type_filter='regressor')

import warnings
warnings.filterwarnings('ignore')

result = []
scores = []
for name, model in models:
    
        try:
        # 모델 객체 생성
            md = model()
        # 학습
            md.fit(train_x_scaled, train_y)
        # 평가
            result.append((name, md.score(train_x_scaled, train_y),
                           md.score(test_x_scaled, test_y)))
                    
        except :
            pass

In [14]:
scores=pd.DataFrame(result,columns=['model','train','test']).sort_values('test',ascending=False)
scores

Unnamed: 0,model,train,test
13,HistGradientBoostingRegressor,0.963283,0.907543
35,RadiusNeighborsRegressor,0.908365,0.906146
9,ExtraTreesRegressor,1.0,0.906131
36,RandomForestRegressor,0.981444,0.904803
12,GradientBoostingRegressor,0.972141,0.902353
15,KNeighborsRegressor,0.896811,0.899344
32,PoissonRegressor,0.862124,0.889617
2,BaggingRegressor,0.974678,0.886004
27,NuSVR,0.863029,0.884121
40,SVR,0.859805,0.884112


In [15]:
rf = RandomForestRegressor()

rf.fit(train_x_scaled, train_y)

In [16]:
print('훈련 점수  : ', rf.score(train_x_scaled, train_y))
print('테스트 점수 : ', rf.score(test_x_scaled, test_y))

훈련 점수  :  0.9806730193349851
테스트 점수 :  0.9086246926085751


In [17]:
model1=Sequential(name="autompg")

In [18]:
l1=Dense(10, activation='relu', input_shape=(7,))
l2=Dense(1, activation='linear')

In [19]:
model1.add(l1)
model1.add(l2)

In [20]:
model1.summary()

Model: "autompg"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 10)                80        
                                                                 
 dense_1 (Dense)             (None, 1)                 11        
                                                                 
Total params: 91
Trainable params: 91
Non-trainable params: 0
_________________________________________________________________


In [21]:
model1.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_squared_error'])

In [22]:
model1.fit(train_x_scaled, train_y, epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x2d7dc38e790>

In [23]:
model1.evaluate(test_x_scaled, test_y)



[58.819496154785156, 58.819496154785156]

In [24]:
model1.predict(test_x_scaled)



array([[14.745116 ],
       [10.022098 ],
       [ 3.3293698],
       [ 6.0064225],
       [ 6.9294868],
       [10.216055 ],
       [ 7.4015265],
       [ 9.7058325],
       [ 4.262925 ],
       [ 2.6119936],
       [ 8.895755 ],
       [15.172734 ],
       [ 9.068445 ],
       [ 6.8543644],
       [ 5.877258 ],
       [ 9.421017 ],
       [ 8.924745 ],
       [ 2.9206195],
       [ 7.3454595],
       [16.367037 ],
       [ 7.75259  ],
       [ 3.2888522],
       [ 2.740722 ],
       [ 8.514948 ],
       [ 6.4805927],
       [17.764566 ],
       [ 7.6206536],
       [ 5.4240265],
       [ 3.0283241],
       [ 8.590225 ],
       [ 7.4802036],
       [12.876546 ],
       [ 4.448713 ],
       [ 8.661678 ],
       [14.939783 ],
       [ 9.460077 ],
       [ 2.3466144],
       [ 4.761491 ],
       [ 6.783739 ],
       [ 8.945149 ],
       [ 2.7892773],
       [10.948491 ],
       [ 4.100364 ],
       [ 9.600062 ],
       [ 7.3857746],
       [13.096575 ],
       [ 7.685446 ],
       [ 5.41