In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

train=pd.read_csv("../bike-sharing-demand/train.csv", parse_dates=['datetime']) #train.csv, datetime컬럼
test=pd.read_csv("../bike-sharing-demand/test.csv", parse_dates=['datetime'])#train.csv, datetime컬럼
train.shape #(10886, 12)
test.shape #(6493, 9)
train['year']=train['datetime'].dt.year
train['month']=train['datetime'].dt.month
train['hour']=train['datetime'].dt.hour
train['dayofweek']=train['datetime'].dt.dayofweek
test['year']=test['datetime'].dt.year
test['month']=test['datetime'].dt.month
test['hour']=test['datetime'].dt.hour
test['dayofweek']=test['datetime'].dt.dayofweek

In [2]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10886 entries, 0 to 10885
Data columns (total 16 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   datetime    10886 non-null  datetime64[ns]
 1   season      10886 non-null  int64         
 2   holiday     10886 non-null  int64         
 3   workingday  10886 non-null  int64         
 4   weather     10886 non-null  int64         
 5   temp        10886 non-null  float64       
 6   atemp       10886 non-null  float64       
 7   humidity    10886 non-null  int64         
 8   windspeed   10886 non-null  float64       
 9   casual      10886 non-null  int64         
 10  registered  10886 non-null  int64         
 11  count       10886 non-null  int64         
 12  year        10886 non-null  int64         
 13  month       10886 non-null  int64         
 14  hour        10886 non-null  int64         
 15  dayofweek   10886 non-null  int64         
dtypes: datetime64[ns](1), 

In [3]:
cfn=['season', 'holiday', 'workingday', 'weather', 'dayofweek', 'month', 'year', 'hour']
for v in cfn:
    train[v]=train[v].astype('category')
    test[v]=test[v].astype('category')

In [4]:
fn=['season', 'holiday', 'workingday', 'weather', 'dayofweek', 'month', 'year', 'hour', 'temp', 'humidity']
xTrain = train[fn]
yTrain = train['count']
xTest = test[fn]

In [5]:
def rmsle(av,pv):
    av = np.array(av)
    pv = np.array(pv)
    logav = np.log1p(av) # 정규분포로 만들기 위해 로그를 씌워줌
    logpv = np.log1p(pv)
    diff = logpv - logav
    ds = np.square(diff)
    md = ds.mean()
    score = np.sqrt(md)
    return score

In [6]:
# 선형회귀모델
from sklearn.linear_model import *

#LinearRegression, Lasso, Ridge
from sklearn.model_selection import GridSearchCV
from sklearn import metrics

In [10]:
# 선형회귀-모델-예측
model = LinearRegression()
ytrain = np.log1p(yTrain)
model.fit(xTrain,ytrain)
predictions = model.predict(xTrain)

In [11]:
# RMSLE 점수 (값이 작을수록 정확하다)
rmsle(np.exp(ytrain)+1,np.exp(predictions)+1)

0.9498107175341887

In [26]:
## 로그함수 탐구
x = np.array([0.00001,1,2,4,10,100])
np.exp(x) # 밑이 자연상수 e인 지수함수 y=e의 x제곱형태로
np.log(x) # 밑이 e
np.log10(x) # 밑이 10
np.log2(x) # 밑이 2
np.log1p(0) #1을 더한 값에 로그를 취한다.

In [24]:
# 신경망 : 퍼셉트론 : AND게이트

def AND(x1,x2):
    w1,w2,theta = 0.5 , 0.5 , 0.7
    res = w1*x1 + w2*x2
    if res <= theta:
        return 0
    elif res > theta:
        return 1

AND(0,0)
AND(0,1)
AND(1,0)
AND(1,1)

1

In [14]:
# 신경망 : 퍼셉트론 : OR게이트

def OR(x1,x2):
    w1,w2,theta = 1, 1, 0.9
    res = w1*x1 + w2*x2
    if res <= theta:
        return 0
    elif res > theta:
        return 1

print(OR(0,0))
print(OR(0,1))
print(OR(1,0))
print(OR(1,1))

0
1
1
1


In [19]:
# 신경망 : 퍼셉트론 : AND게이트

def AND2(x1,x2):
    x = np.array([x1,x2])
    w = np.array([0.5,0.5])
    b = -0.4
    t = np.sum(x*w)+b
    if t<=0:
        return 0
    else:
        return 1

for data in [(0,0),(0,1),(1,0),(1,1)]:
    y = AND2(data[0],data[1])
    print(y)

0
1
1
1


In [22]:
# 신경망 : 퍼셉트론 : NAND게이트

def NAND2(x1,x2): # NOT AND ( 입력값이 모두 1일때만 0, 나머지는 1 )
    x = np.array([x1,x2])
    w = np.array([-0.5,-0.5])
    b = 0.7
    t = np.sum(x*w)+b
    if t<=0:
        return 0
    else:
        return 1

for data in [(0,0),(0,1),(1,0),(1,1)]:
    y = NAND2(data[0],data[1])
    print(y)

1
1
1
0


In [26]:
# 멀티 레이어 퍼셉트론 : 퍼셉트론을 여러개 사용해서 복잡한 문제를 해결할 수 있다.

def XOR(x1,x2):
    x = OR(x1,x2)
    y = NAND2(x1,x2)
    z = AND(x,y)
    return z

for data in [(0,0),(0,1),(1,0),(1,1)]:
    z = XOR(data[0],data[1])
    print(z)

0
1
1
0
