In [None]:
!pip install japanize-matplotlib

In [None]:
!pip install gspread

In [None]:
import pandas as pd
import numpy as np
%matplotlib inline
import japanize_matplotlib
from sklearn.model_selection import train_test_split
from matplotlib import pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [None]:
japanize_matplotlib.japanize()

In [None]:
#ドライブへのアクセス
from google.colab import drive
drive.mount('/content/drive')
%cd "myPath"

# データのダウンロード

In [None]:
cols = [
    'timestamp',
    'mail_adress',
    'sex',
    'friends_in_LINE',
    'GPA',
    'start_day',
    'number_of_companies', # 受けた企業数
    'number_of_offer',     # 内定数
    'best_offer',          # 第一志望群からの内定可否
    'Cooperativeness',     # 協調性
    'Aggressiveness',      # 積極性
    'Action',              #行動力
    'Communication',       #コミュ力
    'Problem-solving ability',
    'Challange',
    'Job_hunting_action'
    ]

In [None]:
# 任意のライブラリのインポート
from google.colab import auth
from google.auth import default
from oauth2client.client import GoogleCredentials
import gspread
import pandas as pd
from sklearn.linear_model import LinearRegression

# GSS読み取りのための準備
auth.authenticate_user()
creds, _ = default()
gc = gspread.authorize(creds)

# 読み込み対象のスプレットシートのURLの定義
url = 'mySpreadSheetId'

# データをpandasに乗せる
worksheet = gc.open_by_url(url).get_worksheet(0)
data = pd.DataFrame(worksheet.get_all_values()[1:], columns = cols)

In [None]:
#はいを1に、いいえ, を0に変換, 性別を数値に変換
data['best_offer'].replace({'はい':1, 'いいえ':0}, inplace=True)
data['sex'].replace({'男性':1, '女性':0}, inplace=True)
data.head()

In [None]:
# 使用カラムを限定
use_cols= [	'friends_in_LINE',
           'GPA',
           'start_day',
           'number_of_companies',
           'number_of_offer',
           'best_offer'
           ]

df = data[use_cols]

In [None]:
df.head()

Unnamed: 0,friends_in_LINE,GPA,start_day,number_of_companies,number_of_offer,best_offer
0,484,3.5,2021/02/01,12,4,1
1,189,1.5,2022/03/20,2,0,0
2,300,3.2,2021/05/01,6,4,1
3,183,3.0,2021/05/01,10,5,1
4,436,3.3,2021/03/01,10,6,1


# start_dayの数値化

In [None]:
import datetime

for i in range(len(df)):
  tstr = df['start_day'][i]
  tdatetime = datetime.datetime.strptime(tstr, '%Y/%m/%d')
  tdate = datetime.date(tdatetime.year, tdatetime.month, tdatetime.day)
  df.loc[i, 'start_year'] = tdate.year
  df.loc[i, 'start_month'] = tdate.month

In [None]:
df.head()

Unnamed: 0,friends_in_LINE,GPA,start_day,number_of_companies,number_of_offer,best_offer,start_year,start_month
0,484,3.5,2021/02/01,12,4,1,2021.0,2.0
1,189,1.5,2022/03/20,2,0,0,2022.0,3.0
2,300,3.2,2021/05/01,6,4,1,2021.0,5.0
3,183,3.0,2021/05/01,10,5,1,2021.0,5.0
4,436,3.3,2021/03/01,10,6,1,2021.0,3.0


# データの可視化

In [None]:
"""
sns.set(font="IPAexGothic", style='whitegrid')
cor = df.corr()

# Set the width and height of the figure
plt.figure(figsize=(10,5))

# Add title
plt.title("Corr_of_sample_data")

sns.heatmap(data=cor, annot=True, cmap= sns.color_palette('coolwarm', 10), vmax=1, vmin=-1, center=0)

# Add label for horizontal axis
plt.tight_layout()
#plt.savefig('Corr_of_sample_data.png')
"""

# データの前処理

In [None]:
#データをスケーリング
from sklearn.preprocessing import StandardScaler
scaled_cols = ['friends_in_LINE', 'GPA', 'start_year', 'start_month']
standard_sc = StandardScaler()
x = standard_sc.fit_transform(df[scaled_cols])

scaled_data = pd.DataFrame(x, columns=scaled_cols)
scaled_data['best_offer'] = df['best_offer']

In [None]:
scaled_data.head()

Unnamed: 0,friends_in_LINE,GPA,start_year,start_month,best_offer
0,0.711586,1.1078,-0.185695,-0.904663,1
1,-0.698837,-3.109712,1.671258,-0.565414,0
2,-0.168135,0.475173,-0.185695,0.113083,1
3,-0.727523,0.053422,-0.185695,0.113083,1
4,0.482094,0.686049,-0.185695,-0.565414,1


# 重回帰分析

In [None]:
#重回帰分析用のデータセット整備
X = scaled_data.drop(['best_offer'], axis=1)
y_reg = df['number_of_offer']

In [None]:
import statsmodels.api as sm
model = sm.OLS(y_reg.astype(float), X.astype(float))  # モデルを作成

result = model.fit()      # モデルを適応
print(result.summary())        # 結果を確認

                                 OLS Regression Results                                
Dep. Variable:        number_of_offer   R-squared (uncentered):                   0.130
Model:                            OLS   Adj. R-squared (uncentered):             -0.004
Method:                 Least Squares   F-statistic:                             0.9675
Date:                Fri, 14 Oct 2022   Prob (F-statistic):                       0.442
Time:                        14:07:44   Log-Likelihood:                         -81.306
No. Observations:                  30   AIC:                                      170.6
Df Residuals:                      26   BIC:                                      176.2
Df Model:                           4                                                  
Covariance Type:            nonrobust                                                  
                      coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------

# ロジスティク回帰

In [None]:
#ロジスティック回帰分析
from sklearn.linear_model import LogisticRegression
import statsmodels.api as sm

y_cla = df['best_offer']

model = sm.Logit(y_cla, sm.add_constant(X))
result = model.fit(disp=0)
print(result.summary())

                           Logit Regression Results                           
Dep. Variable:             best_offer   No. Observations:                   30
Model:                          Logit   Df Residuals:                       25
Method:                           MLE   Df Model:                            4
Date:                Fri, 14 Oct 2022   Pseudo R-squ.:                  0.2849
Time:                        13:59:51   Log-Likelihood:                -6.9739
converged:                       True   LL-Null:                       -9.7525
Covariance Type:            nonrobust   LLR p-value:                    0.2347
                      coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------
const               3.2343      1.126      2.873      0.004       1.028       5.441
friends_in_LINE     1.5020      1.764      0.851      0.395      -1.956       4.960
GPA                 1.3228      