In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Step 1: データの読み込み
status_data = pd.read_csv('/home-asustor/teramoto/SIGNATE/SOTA_Challenge/Share_Cycle/data/status.csv')
weather_data = pd.read_csv('/home-asustor/teramoto/SIGNATE/SOTA_Challenge/Share_Cycle/data/weather.csv')

# 天気データの日付と時間を抽出
weather_data['date'] = pd.to_datetime(weather_data['date'])
weather_data['year'] = weather_data['date'].dt.year
weather_data['month'] = weather_data['date'].dt.month
weather_data['day'] = weather_data['date'].dt.day

# Step 2: データのマージ
status_data = pd.merge(status_data, weather_data, on=['year', 'month', 'day'], how='left')

# Step 3: データのフィルタリング（晴れの日のデータ）
sunny_data = status_data[status_data['events'].isna()]

# Step 4: トレーニングデータの準備 (sunny_data かつ predict == 0)
train_data = sunny_data[sunny_data['predict'] == 0]
X_train = train_data[['max_temperature', 'mean_humidity', 'wind_dir_degrees', 'max_wind_Speed']]
y_train = train_data['bikes_available']

# 欠損値の確認
print(f"Missing values in y_train: {y_train.isna().sum()}")

# 欠損値を0で埋める
y_train.fillna(0, inplace=True)

# Step 5: テストデータの準備 (predict == 1のデータを全体から取得)
test_data = status_data[status_data['predict'] == 1]
X_test = test_data[['max_temperature', 'mean_humidity', 'wind_dir_degrees', 'max_wind_Speed']]

# 欠損値の処理
X_train.fillna(0, inplace=True)
X_test.fillna(0, inplace=True)

# Step 6: モデルの訓練
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Step 7: 予測の実行
test_data['predicted_bikes_available'] = model.predict(X_test)
test_data['predicted_bikes_available'] = test_data['predicted_bikes_available'].round().astype(int)

# Step 8: 結果の表示と保存
print(test_data[['id', 'predicted_bikes_available']].head())

# 予測結果をファイルに保存
output_data = test_data[['id', 'predicted_bikes_available']]
output_data.to_csv('/home-asustor/teramoto/SIGNATE/SOTA_Challenge/Share_Cycle/data/predicted_bikes_available.csv', index=False, header=False)


Missing values in y_train: 194501


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  y_train.fillna(0, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_train.fillna(0, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test.fillna(0, inplace=True)


        id  predicted_bikes_available
8761  8761                          9
8762  8762                          9
8763  8763                          9
8764  8764                          9
8765  8765                          9


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_data['predicted_bikes_available'] = model.predict(X_test)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_data['predicted_bikes_available'] = test_data['predicted_bikes_available'].round().astype(int)
