In [1]:
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import mglearn
%matplotlib inline
import seaborn as sns
import platform
from matplotlib import font_manager , rc

if platform.system() == 'Darwin':
  rc('font' , family = 'AppleGothic')
elif platform.system() == 'Windows':
  path = 'C:/Windows/Fonts/malgun.ttf'
  font_name = font_manager.FontProperties(fname = path).get_name()
  rc('font' , family = font_name)
else:
  print('모름')
plt.rcParams['axes.unicode_minus'] = False
import warnings
warnings.filterwarnings('ignore')

In [2]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score
from sklearn.metrics import f1_score, confusion_matrix, roc_curve, precision_recall_curve
def get(y_test, pred=None, pred_proba=None):
    confusion = confusion_matrix(y_test, pred)
    accuracy = accuracy_score(y_test, pred)
    precision = precision_score(y_test, pred)
    recall = recall_score(y_test, pred)
    f1 = f1_score(y_test, pred)
    #roc_auc = roc_auc_score(y_test, pred_proba)

    print('오차 행렬(혼돈 행렬)')
    print(confusion)

    print(f'정확도:{accuracy:.4f}, 정밀도:{precision:.4f}, 재현율:{recall:.4f}, F1:{f1:.4f}')

def model_fit(model):
    model.fit(train_input , train_target)
    pred = model.predict(test_input)
    return get(test_target , pred)

In [3]:
from sklearn.metrics import mean_squared_error , mean_absolute_error
def rmsle(y , pred):
    log_y = np.log1p(y)
    log_pred = np.log1p(pred)
    squared_error = (log_y - log_pred)**2
    rmsle = np.sqrt(np.mean(squared_error))
    return rmsle

def rmse(y , pred):
    return np.sqrt(mean_squared_error(y , pred))

def evaluate_regr(y , pred):
    rmsle_val = rmsle(y , pred)
    rmse_val = rmse(y , pred)
    
    mae_val = mean_absolute_error(y,pred)
    print('RMSLE : {0:.3f} , RMSE : {1:.3f} , MAE : {2:.3F}'.format(rmsle_val , rmse_val , mae_val))

In [4]:
path = 'C:/k_digital/machine/source/bike-sharing-demand'

In [5]:
bike = pd.read_csv(path + '/train.csv')
bike

Unnamed: 0,datetime,season,holiday,workingday,weather,temp,atemp,humidity,windspeed,casual,registered,count
0,2011-01-01 00:00:00,1,0,0,1,9.84,14.395,81,0.0000,3,13,16
1,2011-01-01 01:00:00,1,0,0,1,9.02,13.635,80,0.0000,8,32,40
2,2011-01-01 02:00:00,1,0,0,1,9.02,13.635,80,0.0000,5,27,32
3,2011-01-01 03:00:00,1,0,0,1,9.84,14.395,75,0.0000,3,10,13
4,2011-01-01 04:00:00,1,0,0,1,9.84,14.395,75,0.0000,0,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...
10881,2012-12-19 19:00:00,4,0,1,1,15.58,19.695,50,26.0027,7,329,336
10882,2012-12-19 20:00:00,4,0,1,1,14.76,17.425,57,15.0013,10,231,241
10883,2012-12-19 21:00:00,4,0,1,1,13.94,15.910,61,15.0013,4,164,168
10884,2012-12-19 22:00:00,4,0,1,1,13.94,17.425,61,6.0032,12,117,129


In [6]:
bike.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10886 entries, 0 to 10885
Data columns (total 12 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   datetime    10886 non-null  object 
 1   season      10886 non-null  int64  
 2   holiday     10886 non-null  int64  
 3   workingday  10886 non-null  int64  
 4   weather     10886 non-null  int64  
 5   temp        10886 non-null  float64
 6   atemp       10886 non-null  float64
 7   humidity    10886 non-null  int64  
 8   windspeed   10886 non-null  float64
 9   casual      10886 non-null  int64  
 10  registered  10886 non-null  int64  
 11  count       10886 non-null  int64  
dtypes: float64(3), int64(8), object(1)
memory usage: 1020.7+ KB


In [7]:
bike.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
season,10886.0,2.506614,1.116174,1.0,2.0,3.0,4.0,4.0
holiday,10886.0,0.028569,0.166599,0.0,0.0,0.0,0.0,1.0
workingday,10886.0,0.680875,0.466159,0.0,0.0,1.0,1.0,1.0
weather,10886.0,1.418427,0.633839,1.0,1.0,1.0,2.0,4.0
temp,10886.0,20.23086,7.79159,0.82,13.94,20.5,26.24,41.0
atemp,10886.0,23.655084,8.474601,0.76,16.665,24.24,31.06,45.455
humidity,10886.0,61.88646,19.245033,0.0,47.0,62.0,77.0,100.0
windspeed,10886.0,12.799395,8.164537,0.0,7.0015,12.998,16.9979,56.9969
casual,10886.0,36.021955,49.960477,0.0,4.0,17.0,49.0,367.0
registered,10886.0,155.552177,151.039033,0.0,36.0,118.0,222.0,886.0


In [8]:
from datetime import datetime
bike.datetime = bike.datetime.apply(lambda x : datetime.strptime(x, '%Y-%m-%d %H:%M:%S'))
bike['year'] = bike.datetime.apply(lambda x : x.year)
bike['month'] = bike.datetime.apply(lambda x : x.month)
bike['day'] = bike.datetime.apply(lambda x : x.day)
bike['hour'] = bike.datetime.apply(lambda x: x.hour)

In [9]:
bike.drop(['datetime','casual','registered'] , axis = 1 , inplace = True)

In [10]:
bike.columns

Index(['season', 'holiday', 'workingday', 'weather', 'temp', 'atemp',
       'humidity', 'windspeed', 'count', 'year', 'month', 'day', 'hour'],
      dtype='object')

In [11]:
bike = bike[['season', 'holiday', 'workingday', 'weather', 'temp', 'atemp',
       'humidity', 'windspeed' , 'year', 'month', 'day', 'hour' , 'count']]

In [12]:
data = bike.iloc[:,:-1]
target = bike.iloc[:,-1]

In [13]:
from sklearn.model_selection import train_test_split
train_input , test_input , train_target , test_target = train_test_split(data , target , test_size = 0.2 , random_state = 0)

In [14]:
from sklearn.linear_model import LinearRegression , Ridge , Lasso
lr = LinearRegression()
rid = Ridge()
las = Lasso()

In [15]:
model = [lr,rid,las]

In [16]:
for i in model:
    i.fit(train_input , train_target)
    evaluate_regr(test_target , i.predict(test_input))

RMSLE : 1.191 , RMSE : 141.973 , MAE : 106.387
RMSLE : 1.191 , RMSE : 141.974 , MAE : 106.386
RMSLE : 1.179 , RMSE : 142.031 , MAE : 106.077


In [17]:
target_log = np.log1p(target)

In [18]:
train_input , test_input , train_target , test_target = train_test_split(data , target_log , test_size = 0.2 , random_state = 0)

In [19]:
for i in model:
    i.fit(train_input , train_target)
    evaluate_regr(test_target , i.predict(test_input))

RMSLE : 0.236 , RMSE : 1.026 , MAE : 0.810
RMSLE : 0.236 , RMSE : 1.026 , MAE : 0.810
RMSLE : 0.251 , RMSE : 1.081 , MAE : 0.858


In [20]:
params = {'alpha' : [0.001 , 0.01 , 0.1 , 1 , 10 , 100]}

In [21]:
from sklearn.model_selection import GridSearchCV

gs = GridSearchCV(rid , params)

gs.fit(train_input , train_target)

In [22]:
evaluate_regr(test_target , gs.predict(test_input))

RMSLE : 0.236 , RMSE : 1.026 , MAE : 0.810


In [23]:
gs.best_params_

{'alpha': 10}

In [24]:
a = Ridge(alpha = 10)
a.fit(train_input , train_target)
evaluate_regr(test_target , a.predict(test_input))

RMSLE : 0.236 , RMSE : 1.026 , MAE : 0.810


In [25]:
test = pd.read_csv(path + '/test.csv')

In [26]:
test

Unnamed: 0,datetime,season,holiday,workingday,weather,temp,atemp,humidity,windspeed
0,2011-01-20 00:00:00,1,0,1,1,10.66,11.365,56,26.0027
1,2011-01-20 01:00:00,1,0,1,1,10.66,13.635,56,0.0000
2,2011-01-20 02:00:00,1,0,1,1,10.66,13.635,56,0.0000
3,2011-01-20 03:00:00,1,0,1,1,10.66,12.880,56,11.0014
4,2011-01-20 04:00:00,1,0,1,1,10.66,12.880,56,11.0014
...,...,...,...,...,...,...,...,...,...
6488,2012-12-31 19:00:00,1,0,1,2,10.66,12.880,60,11.0014
6489,2012-12-31 20:00:00,1,0,1,2,10.66,12.880,60,11.0014
6490,2012-12-31 21:00:00,1,0,1,1,10.66,12.880,60,11.0014
6491,2012-12-31 22:00:00,1,0,1,1,10.66,13.635,56,8.9981


In [27]:
test.datetime = test.datetime.apply(lambda x : datetime.strptime(x, '%Y-%m-%d %H:%M:%S'))
test['year'] = test.datetime.apply(lambda x : x.year)
test['month'] = test.datetime.apply(lambda x : x.month)
test['day'] = test.datetime.apply(lambda x : x.day)
test['hour'] = test.datetime.apply(lambda x: x.hour)

In [28]:
test.drop(['datetime'] , axis = 1 , inplace = True)

In [29]:
test.shape

(6493, 12)

In [30]:
prediction = []
for i in model:
    prediction.append(i.predict(test))

In [31]:
m = np.mean(prediction , axis = 0)

In [32]:
np.exp(m) - 1

array([ 15.52761398,  17.07889521,  18.81166608, ..., 258.87475806,
       306.04812521, 295.2344609 ])

In [33]:
sub = pd.read_csv(path + '/sampleSubmission.csv')

In [34]:
sub['count'] = sub['count'].apply(lambda x : round(x))

In [35]:
sub['count'] = np.exp(m) - 1

In [36]:
sub.to_csv('pred.csv' , index = False)

In [37]:
total = pd.concat([data , test])

In [38]:
data.shape

(10886, 12)

In [39]:
total

Unnamed: 0,season,holiday,workingday,weather,temp,atemp,humidity,windspeed,year,month,day,hour
0,1,0,0,1,9.84,14.395,81,0.0000,2011,1,1,0
1,1,0,0,1,9.02,13.635,80,0.0000,2011,1,1,1
2,1,0,0,1,9.02,13.635,80,0.0000,2011,1,1,2
3,1,0,0,1,9.84,14.395,75,0.0000,2011,1,1,3
4,1,0,0,1,9.84,14.395,75,0.0000,2011,1,1,4
...,...,...,...,...,...,...,...,...,...,...,...,...
6488,1,0,1,2,10.66,12.880,60,11.0014,2012,12,31,19
6489,1,0,1,2,10.66,12.880,60,11.0014,2012,12,31,20
6490,1,0,1,1,10.66,12.880,60,11.0014,2012,12,31,21
6491,1,0,1,1,10.66,13.635,56,8.9981,2012,12,31,22


In [40]:
from sklearn.preprocessing import StandardScaler

In [41]:
ss = StandardScaler()
total['temp'] = ss.fit_transform(total.temp.values.reshape(-1,1))

In [42]:
ss = StandardScaler()
total['atemp'] = ss.fit_transform(total.atemp.values.reshape(-1,1))

In [43]:
ss = StandardScaler()
total['humidity'] = ss.fit_transform(total.humidity.values.reshape(-1,1))

In [44]:
total.year = total.year - 2010

In [45]:
total.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
season,17379.0,2.50164,1.106918,1.0,2.0,3.0,3.0,4.0
holiday,17379.0,0.02877036,0.167165,0.0,0.0,0.0,0.0,1.0
workingday,17379.0,0.6827205,0.465431,0.0,0.0,1.0,1.0,1.0
weather,17379.0,1.425283,0.639357,1.0,1.0,1.0,2.0,4.0
temp,17379.0,-3.924973e-17,1.000029,-2.477205,-0.815304,0.015647,0.846597,2.612367
atemp,17379.0,-2.093319e-16,1.000029,-2.768625,-0.829089,0.052518,0.846255,3.050563
humidity,17379.0,3.0255000000000004e-17,1.000029,-3.251166,-0.763143,0.014364,0.791871,1.932215
windspeed,17379.0,12.73654,8.196795,0.0,7.0015,12.998,16.9979,56.9969
year,17379.0,1.502561,0.500008,1.0,1.0,2.0,2.0,2.0
month,17379.0,6.537775,3.438776,1.0,4.0,7.0,10.0,12.0


In [46]:
prediction = []
for i in model:
    i.fit(total.iloc[:10886,:] , target_log)
    prediction.append(i.predict(total.iloc[10886:,:]))

In [47]:
sub.count = (np.exp(prediction) - 1)[0]

In [48]:
sub.to_csv('pred.csv' , index = False)

In [49]:
total

Unnamed: 0,season,holiday,workingday,weather,temp,atemp,humidity,windspeed,year,month,day,hour
0,1,0,0,1,-1.334648,-1.093281,0.947372,0.0000,1,1,1,0
1,1,0,0,1,-1.438516,-1.181732,0.895539,0.0000,1,1,1,1
2,1,0,0,1,-1.438516,-1.181732,0.895539,0.0000,1,1,1,2
3,1,0,0,1,-1.334648,-1.093281,0.636370,0.0000,1,1,1,3
4,1,0,0,1,-1.334648,-1.093281,0.636370,0.0000,1,1,1,4
...,...,...,...,...,...,...,...,...,...,...,...,...
6488,1,0,1,2,-1.230779,-1.269602,-0.141137,11.0014,2,12,31,19
6489,1,0,1,2,-1.230779,-1.269602,-0.141137,11.0014,2,12,31,20
6490,1,0,1,1,-1.230779,-1.269602,-0.141137,11.0014,2,12,31,21
6491,1,0,1,1,-1.230779,-1.181732,-0.348473,8.9981,2,12,31,22


In [50]:
w = ["season", "weather", "humidity", "day", "temp", "atemp"]

In [51]:
wind_data = total.loc[total.windspeed != 0 ,:][w]
wind_data

Unnamed: 0,season,weather,humidity,day,temp,atemp
5,1,2,0.636370,1,-1.334648,-1.269602
10,1,1,0.688203,1,-0.607566,-0.476447
11,1,1,0.947372,1,-0.711435,-0.829089
12,1,1,0.740037,1,-0.399828,-0.300125
13,1,2,0.480868,1,-0.192091,-0.123804
...,...,...,...,...,...,...
6488,1,2,-0.141137,31,-1.230779,-1.269602
6489,1,2,-0.141137,31,-1.230779,-1.269602
6490,1,1,-0.141137,31,-1.230779,-1.269602
6491,1,1,-0.348473,31,-1.230779,-1.181732


In [52]:
wind_target = total.loc[total.windspeed != 0 ,:].windspeed
wind_target

5        6.0032
10      16.9979
11      19.0012
12      19.0012
13      19.9995
         ...   
6488    11.0014
6489    11.0014
6490    11.0014
6491     8.9981
6492     8.9981
Name: windspeed, Length: 15199, dtype: float64

In [53]:
params = {
    'max_depth' : [8,16,24],
    'min_samples_leaf' : [1,6,12],
    'min_samples_split' : [2,8,16]
}

In [54]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
gs = GridSearchCV(RandomForestRegressor(random_state = 0 , n_jobs = -1) , params , n_jobs = -1)
gs.fit(pd.DataFrame(wind_data.values , columns = w) , wind_target)

In [55]:
gs.best_params_

{'max_depth': 16, 'min_samples_leaf': 1, 'min_samples_split': 16}

In [56]:
w

['season', 'weather', 'humidity', 'day', 'temp', 'atemp']

In [57]:
gs.predict(total.loc[total.windspeed == 0 ,:][w])

array([ 7.27214497,  7.21051739,  7.21051739, ..., 10.04374593,
       22.18760293,  8.88538728])

In [58]:
total.loc[total.windspeed == 0, 'windspeed'] = gs.predict(total.loc[total.windspeed == 0, :][w])

In [59]:
total

Unnamed: 0,season,holiday,workingday,weather,temp,atemp,humidity,windspeed,year,month,day,hour
0,1,0,0,1,-1.334648,-1.093281,0.947372,7.272145,1,1,1,0
1,1,0,0,1,-1.438516,-1.181732,0.895539,7.210517,1,1,1,1
2,1,0,0,1,-1.438516,-1.181732,0.895539,7.210517,1,1,1,2
3,1,0,0,1,-1.334648,-1.093281,0.636370,8.647790,1,1,1,3
4,1,0,0,1,-1.334648,-1.093281,0.636370,8.647790,1,1,1,4
...,...,...,...,...,...,...,...,...,...,...,...,...
6488,1,0,1,2,-1.230779,-1.269602,-0.141137,11.001400,2,12,31,19
6489,1,0,1,2,-1.230779,-1.269602,-0.141137,11.001400,2,12,31,20
6490,1,0,1,1,-1.230779,-1.269602,-0.141137,11.001400,2,12,31,21
6491,1,0,1,1,-1.230779,-1.181732,-0.348473,8.998100,2,12,31,22


In [60]:
prediction = []
for i in model:
    i.fit(total.iloc[:10886,:] , target_log)
    prediction.append(i.predict(total.iloc[10886:,:]))

In [61]:
sub['count'] = np.exp(np.mean(prediction , axis = 0)) - 1

In [62]:
np.exp(np.mean(prediction , axis = 0)) - 1

array([ 16.50718613,  18.14506493,  20.10650219, ..., 304.20308256,
       352.40470654, 356.07957608])

In [63]:
sub

Unnamed: 0,datetime,count
0,2011-01-20 00:00:00,16.507186
1,2011-01-20 01:00:00,18.145065
2,2011-01-20 02:00:00,20.106502
3,2011-01-20 03:00:00,22.042592
4,2011-01-20 04:00:00,24.403337
...,...,...
6488,2012-12-31 19:00:00,249.318379
6489,2012-12-31 20:00:00,274.963829
6490,2012-12-31 21:00:00,304.203083
6491,2012-12-31 22:00:00,352.404707


In [64]:
sub.to_csv('pred.csv' , index = False)

In [65]:
total.columns

Index(['season', 'holiday', 'workingday', 'weather', 'temp', 'atemp',
       'humidity', 'windspeed', 'year', 'month', 'day', 'hour'],
      dtype='object')

In [66]:
for i in ['temp', 'atemp','humidity', 'windspeed']:
    ss = StandardScaler()
    total[i] = ss.fit_transform(total[i].values.reshape(-1,1))

In [67]:
total['weather'] = -total['weather']

In [68]:
prediction = []
for i in model:
    i.fit(total.iloc[:10886,:] , target_log)
    prediction.append(i.predict(total.iloc[10886:,:]))

In [69]:
sub['count'] = np.exp(np.mean(prediction , axis = 0)) - 1

In [70]:
sub.to_csv('pred.csv' , index = False)

In [71]:
sub

Unnamed: 0,datetime,count
0,2011-01-20 00:00:00,16.507155
1,2011-01-20 01:00:00,18.145087
2,2011-01-20 02:00:00,20.106527
3,2011-01-20 03:00:00,22.042612
4,2011-01-20 04:00:00,24.403359
...,...,...
6488,2012-12-31 19:00:00,249.318619
6489,2012-12-31 20:00:00,274.964095
6490,2012-12-31 21:00:00,304.203290
6491,2012-12-31 22:00:00,352.405081


In [72]:
rf = RandomForestRegressor()
rf.fit(total.iloc[:10886,:] , target_log)

In [73]:
prediction = rf.predict(total.iloc[10886:,:])

In [74]:
np.exp(prediction) - 1

array([ 11.4849829 ,   4.9386629 ,   3.43979629, ..., 128.45538504,
       111.38942231,  65.45238721])

In [75]:
sub['count'] = np.exp(prediction) - 1
sub

Unnamed: 0,datetime,count
0,2011-01-20 00:00:00,11.484983
1,2011-01-20 01:00:00,4.938663
2,2011-01-20 02:00:00,3.439796
3,2011-01-20 03:00:00,2.889798
4,2011-01-20 04:00:00,2.095629
...,...,...
6488,2012-12-31 19:00:00,302.563010
6489,2012-12-31 20:00:00,192.225043
6490,2012-12-31 21:00:00,128.455385
6491,2012-12-31 22:00:00,111.389422


In [76]:
sub.to_csv('pred.csv' , index = False)

In [77]:
w.append('windspeed')

In [78]:
w

['season', 'weather', 'humidity', 'day', 'temp', 'atemp', 'windspeed']

In [79]:
total.columns

Index(['season', 'holiday', 'workingday', 'weather', 'temp', 'atemp',
       'humidity', 'windspeed', 'year', 'month', 'day', 'hour'],
      dtype='object')

In [80]:
rf.fit(total.iloc[:10886,[3,4,5,6,7]] , target_log)

In [81]:
prediction = rf.predict(total.iloc[10886:,[3,4,5,6,7]])

In [82]:
sub['count'] = np.exp(prediction) - 1
sub

Unnamed: 0,datetime,count
0,2011-01-20 00:00:00,124.073018
1,2011-01-20 01:00:00,28.941885
2,2011-01-20 02:00:00,28.941885
3,2011-01-20 03:00:00,7.482864
4,2011-01-20 04:00:00,7.482864
...,...,...
6488,2012-12-31 19:00:00,70.899378
6489,2012-12-31 20:00:00,70.899378
6490,2012-12-31 21:00:00,82.601215
6491,2012-12-31 22:00:00,22.652148


In [83]:
sub.to_csv('pred.csv' , index = False)

In [84]:
data = total.iloc[:10886,:]

In [85]:
train_input , test_input , train_target , test_target = train_test_split(data , target_log , test_size = 0.2 , random_state = 0)

In [86]:
rf.fit(train_input , train_target)

In [87]:
evaluate_regr(test_target , rf.predict(test_input))

RMSLE : 0.093 , RMSE : 0.335 , MAE : 0.229


In [88]:
evaluate_regr(test_target , lr.predict(test_input))

RMSLE : 0.236 , RMSE : 1.025 , MAE : 0.809


In [89]:
total

Unnamed: 0,season,holiday,workingday,weather,temp,atemp,humidity,windspeed,year,month,day,hour
0,1,0,0,-1,-1.334648,-1.093281,0.947372,-1.010256,1,1,1,0
1,1,0,0,-1,-1.438516,-1.181732,0.895539,-1.019312,1,1,1,1
2,1,0,0,-1,-1.438516,-1.181732,0.895539,-1.019312,1,1,1,2
3,1,0,0,-1,-1.334648,-1.093281,0.636370,-0.808121,1,1,1,3
4,1,0,0,-1,-1.334648,-1.093281,0.636370,-0.808121,1,1,1,4
...,...,...,...,...,...,...,...,...,...,...,...,...
6488,1,0,1,-2,-1.230779,-1.269602,-0.141137,-0.462286,2,12,31,19
6489,1,0,1,-2,-1.230779,-1.269602,-0.141137,-0.462286,2,12,31,20
6490,1,0,1,-1,-1.230779,-1.269602,-0.141137,-0.462286,2,12,31,21
6491,1,0,1,-1,-1.230779,-1.181732,-0.348473,-0.756647,2,12,31,22


In [91]:
train_input

Unnamed: 0,season,holiday,workingday,weather,temp,atemp,humidity,windspeed,year,month,day,hour
3662,3,0,0,-1,1.054335,1.022576,-0.089304,0.125451,1,9,3,12
8648,3,0,1,-1,1.677548,1.727862,-0.555808,0.418828,2,8,2,17
7053,2,0,1,-1,-1.023041,-1.093281,0.118032,0.418828,2,4,12,6
2685,2,0,0,-1,1.158204,1.198898,-0.037470,-0.756647,1,6,19,19
5927,1,0,1,-1,-1.230779,-1.269602,-0.141137,0.125451,2,2,3,4
...,...,...,...,...,...,...,...,...,...,...,...,...
4859,4,0,1,-3,0.223385,0.229421,-0.141137,0.125451,1,11,15,13
3264,3,0,1,-1,1.054335,1.022576,0.118032,0.125451,1,8,5,22
9845,4,0,0,-1,0.846597,0.846255,-1.229647,2.035353,2,10,14,14
10799,4,0,0,-2,-0.711435,-0.740638,1.258375,-0.756647,2,12,16,9


In [96]:
train_target

3662     5.552960
8648     6.763885
7053     4.574711
2685     5.743003
5927     1.098612
           ...   
4859     5.036953
3264     5.241747
9845     6.426488
10799    4.890349
2732     5.950643
Name: count, Length: 8708, dtype: float64

In [93]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
model = Sequential()
model.add(Dense(12,  input_dim=12, activation='relu'))
model.add(Dense(8,  activation='relu'))
model.add(Dense(3, activation='softmax'))
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_6 (Dense)             (None, 12)                156       
                                                                 
 dense_7 (Dense)             (None, 8)                 104       
                                                                 
 dense_8 (Dense)             (None, 3)                 27        
                                                                 
Total params: 287 (1.12 KB)
Trainable params: 287 (1.12 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [94]:
# 모델 컴파일
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# 모델 실행
history=model.fit(train_input , train_target , epochs=50)

Epoch 1/50


ValueError: in user code:

    File "C:\Anaconda3\lib\site-packages\keras\src\engine\training.py", line 1338, in train_function  *
        return step_function(self, iterator)
    File "C:\Anaconda3\lib\site-packages\keras\src\engine\training.py", line 1322, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Anaconda3\lib\site-packages\keras\src\engine\training.py", line 1303, in run_step  **
        outputs = model.train_step(data)
    File "C:\Anaconda3\lib\site-packages\keras\src\engine\training.py", line 1081, in train_step
        loss = self.compute_loss(x, y, y_pred, sample_weight)
    File "C:\Anaconda3\lib\site-packages\keras\src\engine\training.py", line 1139, in compute_loss
        return self.compiled_loss(
    File "C:\Anaconda3\lib\site-packages\keras\src\engine\compile_utils.py", line 265, in __call__
        loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    File "C:\Anaconda3\lib\site-packages\keras\src\losses.py", line 142, in __call__
        losses = call_fn(y_true, y_pred)
    File "C:\Anaconda3\lib\site-packages\keras\src\losses.py", line 268, in call  **
        return ag_fn(y_true, y_pred, **self._fn_kwargs)
    File "C:\Anaconda3\lib\site-packages\keras\src\losses.py", line 2122, in categorical_crossentropy
        return backend.categorical_crossentropy(
    File "C:\Anaconda3\lib\site-packages\keras\src\backend.py", line 5560, in categorical_crossentropy
        target.shape.assert_is_compatible_with(output.shape)

    ValueError: Shapes (None, 1) and (None, 3) are incompatible
