## BigQueryからデータインポート

In [None]:
from google.colab import auth
auth.authenticate_user()

In [None]:
from google.cloud import bigquery

project_id = "自分のプロジェクト名"

client = bigquery.Client(project=project_id)

query = """
SELECT
  *
FROM
  `自分のプロジェクト名.myData.pitchTypeData`
"""

df = client.query(query).to_dataframe()
df

## 主成分分析

from sklearn.decomposition import PCA
import numpy as np

pca = PCA(n_components=2)
transformed = pca.fit_transform(df.drop(["pitchTypeDescription", "pitcherName"], axis=1))

In [None]:
import matplotlib.pyplot as plt

for label in np.unique(df.pitchTypeDescription):
  plt.scatter(transformed[df.pitchTypeDescription == label, 0], transformed[df.pitchTypeDescription == label, 1])

In [None]:
plt.scatter(transformed[df.pitchTypeDescription == "Changeup", 0], transformed[df.pitchTypeDescription == "Changeup", 1])

## モデル構築

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report

In [None]:
df_train, df_test = train_test_split(df, test_size=0.3)
df

In [None]:
Y_train = df_train["pitchTypeDescription"]
X_train = df_train.drop(["pitchTypeDescription", "pitcherName"], axis=1)

Y_test = df_test["pitchTypeDescription"]
X_test = df_test.drop(["pitchTypeDescription", "pitcherName"], axis=1)

In [None]:
cls = MLPClassifier()
cls.fit(X_train, Y_train)

print(cls.score(X_test, Y_test))

In [None]:
Y_pred = cls.predict(X_test)

cm = confusion_matrix(Y_test, Y_pred)

plt.figure(figsize=(10,7))
sns.heatmap(cm, annot=True, fmt="d", xticklabels=np.unique(Y_test), yticklabels=np.unique(Y_test))
plt.xlabel("Predict label")
plt.ylabel("Actual label")
plt.show()

In [None]:
print(classification_report(Y_test, Y_pred))

## 再学習

In [None]:
query = """
SELECT
  *
FROM
  `自分のプロジェクト名.myData.pitchTypeData`
"""

df = client.query(query).to_dataframe()

In [None]:
df_train, df_test = train_test_split(df, test_size=0.3)

Y_train = df_train["pitchTypeDescription"]
X_train = df_train.drop("pitchTypeDescription", axis=1)

Y_test = df_test["pitchTypeDescription"]
X_test = df_test.drop("pitchTypeDescription", axis=1)

In [None]:
from sklearn.preprocessing import LabelEncoder

encorder = LabelEncoder().fit(df["pitcherName"])

X_train["pitcherName"] = encorder.transform(X_train["pitcherName"])
X_test["pitcherName"] = encorder.transform(X_test["pitcherName"])


In [None]:
cls = MLPClassifier()
cls.fit(X_train, Y_train)

print(cls.score(X_test, Y_test))

In [None]:
Y_pred = cls.predict(X_test)

cm = confusion_matrix(Y_test, Y_pred)

plt.figure(figsize=(10,7))
sns.heatmap(cm, annot=True, fmt="d", xticklabels=np.unique(Y_test), yticklabels=np.unique(Y_test))
plt.xlabel("Predict label")
plt.ylabel("Actual label")
plt.show()

In [None]:
print(classification_report(Y_test, Y_pred))