In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline
from sklearn.linear_model import LinearRegression as LR

In [2]:
root_dir = '/Users/kessapassa/OneDrive/research_log/logs/'

# csvを読み込み
### * VehicleとPedestrianの前には空白が入っているので注意

In [33]:
main_csv = pd.read_csv(root_dir+'2_8_seed123.csv', encoding='Shift_JIS', dtype = None, delimiter = ",")
mobile = main_csv.copy()
census = main_csv[main_csv['road'].str.contains('census')]
vehicles = main_csv[main_csv['type'] == ' Vehicle']
pedestrians = main_csv[main_csv['type'] == ' Pedestrian']

In [35]:
mobile.head()

Unnamed: 0,id,type,time,road,x,y,area
0,62378,Vehicle,3600,road14525,-9553.58,-8091.56,6
1,57801,Vehicle,3600,road768,-1987.29,-983.759,27
2,56165,Vehicle,3600,国道157号(census)(14),-1920.13,-2695.35,21
3,48112,Pedestrian,3600,,-3383.34,-234.17,33
4,52679,Vehicle,3600,向粟崎安江町線(census)(3),-2042.5,481.471,33


In [36]:
census.head()

Unnamed: 0,id,type,time,road,x,y,area
2,56165,Vehicle,3600,国道157号(census)(14),-1920.13,-2695.35,21
4,52679,Vehicle,3600,向粟崎安江町線(census)(3),-2042.5,481.471,33
10,39363,Vehicle,3600,向粟崎安江町線(census)(3),-2036.49,461.377,33
12,46376,Vehicle,3600,国道157号(census)(9),-4554.54,-4833.92,14
14,43480,Vehicle,3600,国道157号(census)(14),-1744.93,-2410.66,21


In [37]:
vehicles.head()

Unnamed: 0,id,type,time,road,x,y,area
0,62378,Vehicle,3600,road14525,-9553.58,-8091.56,6
1,57801,Vehicle,3600,road768,-1987.29,-983.759,27
2,56165,Vehicle,3600,国道157号(census)(14),-1920.13,-2695.35,21
4,52679,Vehicle,3600,向粟崎安江町線(census)(3),-2042.5,481.471,33
5,63606,Vehicle,3600,road2609,-1814.24,-909.539,27


In [38]:
pedestrians.head()

Unnamed: 0,id,type,time,road,x,y,area
3,48112,Pedestrian,3600,,-3383.34,-234.17,33
6,59079,Pedestrian,3600,,-1930.42,-808.375,27
8,44242,Pedestrian,3600,,-3377.51,-3210.72,21
9,58113,Pedestrian,3600,,-3459.28,-3319.91,21
11,41267,Pedestrian,3600,,-2479.28,-3197.44,21


# typeは文字列なのでダミー変数化

In [4]:
Y = mobile['area']
X = mobile.drop(['road', 'x', 'y', 'area'], axis=1)
X['type'] = pd.get_dummies(X['type'])

# 出力ファイルの名前

In [5]:
output_name = './Output/outpu.csv'

In [6]:
X.head()

Unnamed: 0,id,type,time
0,62378,0,3600
1,57801,0,3600
2,56165,0,3600
3,48112,1,3600
4,52679,0,3600


In [7]:
Y.head()

0     6
1    27
2    21
3    33
4    33
Name: area, dtype: int64

In [8]:
model = LR()

In [9]:
model.fit(X, Y)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

# 傾き

In [10]:
model.coef_

array([-1.51952143e-05, -3.45997838e-01, -8.91430679e-05])

# 切片

In [11]:
model.intercept_

25.15451902446761

# 相関関係

In [12]:
model.score(X, Y)

0.002310170165308989

# テスト

In [17]:
testX = vehicles.drop(['road', 'x', 'y', 'area'], axis=1)
testX['type'] = pd.get_dummies(testX['type'])

In [18]:
testX.head()

Unnamed: 0,id,type,time
0,62378,1,3600
1,57801,1,3600
2,56165,1,3600
4,52679,1,3600
5,63606,1,3600


In [21]:
pred = model.predict(testX)
pred

array([23.53975906, 23.60930756, 23.63416693, 23.68713745, 23.52109934,
       23.53162962, 23.88947692, 23.78291288, 23.63153816, 23.82691822,
       23.79550972, 23.76521046, 23.75303909, 23.70800048, 23.97016351,
       23.7184092 , 23.55270539, 23.67438866, 23.85943598, 23.85056198,
       23.8615937 , 23.7587221 , 23.63399978, 23.84910324, 23.58877883,
       23.67005803, 23.52040036, 23.81252836, 23.49813937, 23.83518442,
       23.92029282, 23.66018114, 23.74869326, 23.56299255, 23.70105626,
       23.82819462, 23.80989958, 23.77929642, 23.86233827, 23.91644843,
       23.93216028, 23.85870661, 23.57817257, 23.81055298, 23.67057466,
       23.87464639, 23.92618856, 23.71266541, 23.69794124, 23.65079049,
       23.71657058, 23.56200486, 23.62081034, 23.52701028, 23.82141756,
       23.87142501, 23.7771539 , 23.68599781, 23.48941732, 23.72743516,
       23.65515152, 23.76770247, 23.66069777, 23.63878628, 23.49894472,
       23.80186132, 23.88810935, 23.6507601 , 23.77274728, 23.89

In [22]:
model.get_params()

{'copy_X': True, 'fit_intercept': True, 'n_jobs': 1, 'normalize': False}