In [33]:
import pandas as pd
import matplotlib.pyplot as plt
from torch import nn
from tqdm.auto import tqdm
from torch.cuda import is_available
import torch
import torch.nn as nn


In [34]:
data = pd.read_csv("data/train.csv")
data.head()

Unnamed: 0,id,day,pressure,maxtemp,temparature,mintemp,dewpoint,humidity,cloud,sunshine,winddirection,windspeed,rainfall
0,0,1,1017.4,21.2,20.6,19.9,19.4,87.0,88.0,1.1,60.0,17.2,1
1,1,2,1019.5,16.2,16.9,15.8,15.4,95.0,91.0,0.0,50.0,21.9,1
2,2,3,1024.1,19.4,16.1,14.6,9.3,75.0,47.0,8.3,70.0,18.1,1
3,3,4,1013.4,18.1,17.8,16.9,16.8,95.0,95.0,0.0,60.0,35.6,1
4,4,5,1021.8,21.3,18.4,15.2,9.6,52.0,45.0,3.6,40.0,24.8,0


In [35]:
train_labels = data['rainfall']
train_data = data.drop(['rainfall'], axis=1)
train_labels.shape, train_data.shape

((2190,), (2190, 12))

In [36]:
device = "cuda" if is_available() else "cpu"
device

'cuda'

In [37]:
train_labels = data['rainfall']
train_data = data.drop(['rainfall'], axis=1)

train_data = train_data.drop(['id'], axis=1)

train_data_t = torch.tensor(train_data.to_numpy(), dtype=torch.float32).to(device)
train_labels_t = torch.tensor(train_labels.to_numpy(), dtype=torch.float32).to(device)


In [38]:
torch.manual_seed(42)
class WeatherNN(nn.Module):
    def __init__(self, IN):
        super().__init__()
        self.layers_block = nn.Sequential(
            nn.Linear(IN, 1024),
            nn.BatchNorm1d(1024),
            nn.LeakyReLU(0.1),
            nn.Dropout(0.2),

            nn.Linear(1024, 512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU(0.1),
            nn.Dropout(0.2),

            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.LeakyReLU(0.1),
            nn.Dropout(0.1),

            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.LeakyReLU(0.1),

            nn.Linear(128, 1)   # выход без активации
        )

    def forward(self, x):
        return self.layers_block(x)


model = WeatherNN(IN=11).to(device)
loss_fn = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)

epochs = 10
batch_size = 365 



for epoch in tqdm(range(epochs)):
    

    for i in range(0, len(train_data_t), batch_size):
        model.train()
        x_batch = train_data_t[i:i+batch_size]
        y_batch = train_labels_t[i:i+batch_size].unsqueeze(1)

        logits = model(x_batch)                     
        loss = loss_fn(logits, y_batch)             

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        model.eval()
        with torch.inference_mode():
            probs = torch.sigmoid(logits)           
            preds = (probs > 0.5).float()           
            acc = (preds == y_batch).float().mean()

    if (epoch+1) % 5 == 0:
        print(f"Epoch {epoch+1:4d} | Loss: {loss.item():.4f} | Acc: {acc.item():.4f}")


100%|██████████| 10/10 [00:00<00:00, 48.74it/s]

Epoch    5 | Loss: 0.3224 | Acc: 0.8658
Epoch   10 | Loss: 0.3089 | Acc: 0.8740





In [39]:
test_data = pd.read_csv("data/test.csv")
test_data

Unnamed: 0,id,day,pressure,maxtemp,temparature,mintemp,dewpoint,humidity,cloud,sunshine,winddirection,windspeed
0,2190,1,1019.5,17.5,15.8,12.7,14.9,96.0,99.0,0.0,50.0,24.3
1,2191,2,1016.5,17.5,16.5,15.8,15.1,97.0,99.0,0.0,50.0,35.3
2,2192,3,1023.9,11.2,10.4,9.4,8.9,86.0,96.0,0.0,40.0,16.9
3,2193,4,1022.9,20.6,17.3,15.2,9.5,75.0,45.0,7.1,20.0,50.6
4,2194,5,1022.2,16.1,13.8,6.4,4.3,68.0,49.0,9.2,20.0,19.4
...,...,...,...,...,...,...,...,...,...,...,...,...
725,2915,361,1020.8,18.2,17.6,16.1,13.7,96.0,95.0,0.0,20.0,34.3
726,2916,362,1011.7,23.2,18.1,16.0,16.0,78.0,80.0,1.6,40.0,25.2
727,2917,363,1022.7,21.0,18.5,17.0,15.5,92.0,96.0,0.0,50.0,21.9
728,2918,364,1014.4,21.0,20.0,19.7,19.8,94.0,93.0,0.0,50.0,39.5


In [40]:
test_data[test_data.isnull().any(axis=1)]

Unnamed: 0,id,day,pressure,maxtemp,temparature,mintemp,dewpoint,humidity,cloud,sunshine,winddirection,windspeed
517,2707,153,1007.8,32.9,30.6,28.9,22.0,65.0,75.0,8.2,,17.2


In [41]:
test_data = test_data.fillna(test_data.mean())
test_data[test_data.isnull().any(axis=1)]

Unnamed: 0,id,day,pressure,maxtemp,temparature,mintemp,dewpoint,humidity,cloud,sunshine,winddirection,windspeed


In [42]:
test_data = test_data.drop(['id'], axis=1)
test_data

Unnamed: 0,day,pressure,maxtemp,temparature,mintemp,dewpoint,humidity,cloud,sunshine,winddirection,windspeed
0,1,1019.5,17.5,15.8,12.7,14.9,96.0,99.0,0.0,50.0,24.3
1,2,1016.5,17.5,16.5,15.8,15.1,97.0,99.0,0.0,50.0,35.3
2,3,1023.9,11.2,10.4,9.4,8.9,86.0,96.0,0.0,40.0,16.9
3,4,1022.9,20.6,17.3,15.2,9.5,75.0,45.0,7.1,20.0,50.6
4,5,1022.2,16.1,13.8,6.4,4.3,68.0,49.0,9.2,20.0,19.4
...,...,...,...,...,...,...,...,...,...,...,...
725,361,1020.8,18.2,17.6,16.1,13.7,96.0,95.0,0.0,20.0,34.3
726,362,1011.7,23.2,18.1,16.0,16.0,78.0,80.0,1.6,40.0,25.2
727,363,1022.7,21.0,18.5,17.0,15.5,92.0,96.0,0.0,50.0,21.9
728,364,1014.4,21.0,20.0,19.7,19.8,94.0,93.0,0.0,50.0,39.5


In [43]:
torch.cuda.manual_seed(42)
model.eval()
with torch.inference_mode():
    X_test = torch.tensor(test_data.to_numpy(), dtype=torch.float32).to(device)
    test_logits_1 = model(X_test[:365])
    test_logits_2 = model(X_test[365:])
    test_logits = torch.cat([test_logits_1, test_logits_2], dim=0)


In [44]:
test_logits.shape

torch.Size([730, 1])

In [45]:
test_preds = (torch.sigmoid(test_logits))
test_preds = test_preds.cpu().numpy()
test_preds

array([[9.75440860e-01],
       [9.77846503e-01],
       [9.46271181e-01],
       [6.35128692e-02],
       [4.04625870e-02],
       [8.82710338e-01],
       [9.20643091e-01],
       [9.54995096e-01],
       [9.57842469e-01],
       [6.88681483e-01],
       [9.78229225e-01],
       [5.23911566e-02],
       [9.75148320e-01],
       [9.38132763e-01],
       [2.24899635e-01],
       [3.59189021e-03],
       [8.89480710e-01],
       [6.40398026e-01],
       [5.46329319e-02],
       [2.27274769e-03],
       [7.03551546e-02],
       [7.72348419e-02],
       [8.09247196e-01],
       [9.66635644e-01],
       [8.00833821e-01],
       [3.83851081e-01],
       [3.89596750e-03],
       [9.62106884e-01],
       [8.47012937e-01],
       [4.52826828e-01],
       [9.30345476e-01],
       [9.31671917e-01],
       [8.29313099e-01],
       [9.58744287e-01],
       [7.42669702e-01],
       [8.80029857e-01],
       [1.09489210e-01],
       [9.23420250e-01],
       [7.54436314e-01],
       [7.76726425e-01],


In [46]:
answer = pd.read_csv("data/sample_submission.csv")
answer

Unnamed: 0,id,rainfall
0,2190,0
1,2191,0
2,2192,0
3,2193,0
4,2194,0
...,...,...
725,2915,0
726,2916,0
727,2917,0
728,2918,0


In [47]:
answer['rainfall'] = test_preds
answer

Unnamed: 0,id,rainfall
0,2190,0.975441
1,2191,0.977847
2,2192,0.946271
3,2193,0.063513
4,2194,0.040463
...,...,...
725,2915,0.982182
726,2916,0.795519
727,2917,0.965605
728,2918,0.984550


In [49]:
answer.to_csv("data/answer13.csv", index=False)