In [34]:
import os
import numpy as np
import pandas as pd
import torch
import torchvision.transforms as tr
from torch.utils import data
from torch.optim import lr_scheduler

In [35]:
import FocalLoss

In [55]:
class Rain_Loader(data.Dataset):

    def __init__(self, data_path, train):
        self.kind = ['No', 'Yes']
        self.file = pd.read_csv(data_path)
        self.train = train
        
        if self.train:
            self.labels = self.file['Attribute23']
            self.file.drop(columns=['Attribute23'], inplace=True)
        
        self.data = self.file.values
    
    def __len__(self):
        return len(self.data)

    def __getitem__(self,index):

        data = self.data[index]

        if self.train:
            label = self.labels[index]
        else:
            label = 0
        return data, label

        
    

In [72]:
import torch.nn as nn
class MyNet(nn.Module):
    def __init__(self, num_classes=2):
        super(MyNet, self).__init__()
        self.classifier = nn.Sequential(
            nn.Linear(22, 11),
            nn.ReLU(),
            nn.Linear(11, 6),
            nn.ReLU(),
            nn.Linear(6, num_classes),
        )
    def forward(self, x):
        x = x.view(1, len(x)*len(x[0]))
        x = self.classifier(x)
        return x
    
Net = MyNet(num_classes=2)
Net.train()

MyNet(
  (classifier): Sequential(
    (0): Linear(in_features=22, out_features=11, bias=True)
    (1): ReLU()
    (2): Linear(in_features=11, out_features=6, bias=True)
    (3): ReLU()
    (4): Linear(in_features=6, out_features=2, bias=True)
  )
)

In [83]:
import torch.optim as optim
from torch.autograd import Variable

train_data = Rain_Loader(data_path = 'train2.csv',train = True)
train_dataloader = data.DataLoader(dataset=train_data,batch_size = 1, shuffle=True)

test_data = Rain_Loader(data_path = 'test2.csv',train = False)
test_dataloader = data.DataLoader(dataset=test_data,batch_size = 1, shuffle=False)

model = MyNet(num_classes = 2)
model = model.float()
model.train()

criterion1 = FocalLoss.FocalLoss(gamma=5.5, alpha=0.16, size_average=False)
optimizer = optim.Adam(model.parameters(), lr=0.01)
# exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

In [84]:
iter_time = 0
run_loss = 0.0
times = 15

for epoch in range(times):
    print("Epoch [",epoch+1," / " , times , "]", end='')
    # Start Training

    for i,Data in enumerate(train_dataloader,start = 1):

        iter_time+=1
        # Loading Image and Label
        row,labels = Data
        row,labels = Variable(row) ,Variable(labels)
        # Feeding into model
        
        preds = model(row.float())
        # Get loss
        labels = labels.to(dtype=torch.int64)

        loss = criterion1(preds, labels)
        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        run_loss += loss.item()
#     exp_lr_scheduler.step()
    print(", Loss : %.4f" %((run_loss / len(train_dataloader))))
#     print(one, zero)
    run_loss = 0
print('finish')

Epoch [ 1  /  15 ], Loss : 0.0037
Epoch [ 2  /  15 ], Loss : 0.0034
Epoch [ 3  /  15 ], Loss : 0.0038
Epoch [ 4  /  15 ], Loss : 0.0034
Epoch [ 5  /  15 ], Loss : 0.0033
Epoch [ 6  /  15 ], Loss : 0.0033
Epoch [ 7  /  15 ], Loss : 0.0033
Epoch [ 8  /  15 ], Loss : 0.0033
Epoch [ 9  /  15 ], Loss : 0.0032
Epoch [ 10  /  15 ], Loss : 0.0033
Epoch [ 11  /  15 ], Loss : 0.0032
Epoch [ 12  /  15 ], Loss : 0.0032
Epoch [ 13  /  15 ], Loss : 0.0032
Epoch [ 14  /  15 ]

KeyboardInterrupt: 

In [85]:
ans = pd.DataFrame(columns=['id', 'ans'])
for i,Data in enumerate(test_dataloader):
    # Loading Image and Label
    row,labels = Data
    row,labels = Variable(row) ,Variable(labels)
    # Feeding into model
    preds = model(row.float())
    
    if(i % 50 == 0):
        s = nn.Softmax()
        preds = s(preds)
        print(i, preds)
        
    
    _, preds = torch.max(preds.data, 1)
    
    ans.loc[i] = [str(i) + '.0', preds.item()]
ans.to_csv('ans4.csv',index=None, quoting=2)

  # This is added back by InteractiveShellApp.init_path()


0 tensor([[0.5727, 0.4273]], grad_fn=<SoftmaxBackward>)
50 tensor([[0.5727, 0.4273]], grad_fn=<SoftmaxBackward>)
100 tensor([[0.4935, 0.5065]], grad_fn=<SoftmaxBackward>)
150 tensor([[0.5727, 0.4273]], grad_fn=<SoftmaxBackward>)
200 tensor([[0.5314, 0.4686]], grad_fn=<SoftmaxBackward>)
250 tensor([[0.4767, 0.5233]], grad_fn=<SoftmaxBackward>)
300 tensor([[0.5727, 0.4273]], grad_fn=<SoftmaxBackward>)
350 tensor([[0.5727, 0.4273]], grad_fn=<SoftmaxBackward>)
400 tensor([[0.4606, 0.5394]], grad_fn=<SoftmaxBackward>)
450 tensor([[0.5727, 0.4273]], grad_fn=<SoftmaxBackward>)
500 tensor([[0.4453, 0.5547]], grad_fn=<SoftmaxBackward>)
550 tensor([[0.4480, 0.5520]], grad_fn=<SoftmaxBackward>)
600 tensor([[0.5455, 0.4545]], grad_fn=<SoftmaxBackward>)
650 tensor([[0.5727, 0.4273]], grad_fn=<SoftmaxBackward>)
700 tensor([[0.4628, 0.5372]], grad_fn=<SoftmaxBackward>)
750 tensor([[0.4602, 0.5398]], grad_fn=<SoftmaxBackward>)
800 tensor([[0.4313, 0.5687]], grad_fn=<SoftmaxBackward>)


In [86]:
n = pd.read_csv('ans4.csv')
n = len(n[n['ans'] == 1])
print(n)

453


In [21]:
ans.describe()

Unnamed: 0,id,ans
count,824.0,824
unique,824.0,2
top,210.0,1
freq,1.0,450


In [21]:
ans1 = pd.read_csv('ans3.csv')
ans2 = pd.read_csv('anst.csv')


In [22]:
size = len(ans1)
ans1 = ans1['ans']
ans2 = ans2['ans']
n = 0

for i in range(size):
    if ans1[i] == ans2[i]:
        n = n + 1
print(n/size)

0.6868932038834952


In [30]:
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree
import numpy as np

In [31]:
Data = pd.read_csv('train2.csv')
Label = Data['Attribute23']
Data.drop(columns=['Attribute23'], inplace=True)
Data = Data.values

In [32]:
from sklearn.model_selection import train_test_split
xTrain, xTest, yTrain, yTest = train_test_split(Data, Label, test_size=0.33, random_state=42)

In [33]:
clf = DecisionTreeClassifier()
clf = clf.fit(Data, Label)

In [50]:
test = pd.read_csv('test2.csv').values
ans2 = pd.DataFrame({'id': [str(i)+'.0' for i in range(len(test))], 'ans': [int(i) for i in clf.predict(test)]}, columns=['id', 'ans'])


In [51]:
ans2.to_csv('ans2.csv', index=0, quoting=2)

In [116]:
import tensorflow as tf
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
import numpy as np
import pandas as pd

N = 8

Data = pd.read_csv('train2.csv')
# one = Data[Data['Atrribute23'] == 1].values
# zero = np.random.shuffle(Data[Data['Atrribute22'] == 0].values)[0:len(one)]
# one.append(zero)
# Data = np.delete(one, [len(one) - 1 , len(one-2)])

Labels = Data['Attribute23'].values
Data.drop(columns=['Attribute23'], inplace=True)
Data = Data.values

model = Sequential()

model.add(Flatten())  # this converts our 3D feature maps to 1D feature vectors
model.add(Dense(N))
model.add(Activation('relu'))

model.add(Dense(N))
model.add(Activation('relu'))

model.add(Dense(1))
model.add(Activation('sigmoid'))

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

model.fit(Data, Labels, batch_size=32, epochs=20, validation_split=0.2)

Train on 13677 samples, validate on 3420 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x25dfcf7b898>

In [122]:
# new_model.predict(imgArray)
n = 0
# start = 60
TestData = pd.read_csv('test2.csv')
# TestData = TestData[TestData['Attribute23'] == 1]
# TestData.drop(columns=['Attribute23'], inplace=True)
# validate = TestData['Attribute22'].values
TestData = TestData.values


ans = pd.DataFrame(columns=['id', 'ans'])
for i in range(len(TestData)//N):
    for pred in model.predict(TestData[i*N: i*N+N]):
        a = 1 if pred[0] >= 0.5 else 0
        ans.loc[len(ans)] = [str(len(ans)) + '.0', a]


In [125]:
ans.to_csv('anstf.csv', index=None, quoting=2)