In [1]:
from PIL import Image
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt;
import torch
import torch.nn as nn
import torch.nn.functional as F
from datetime import datetime
import time
import os
import copy

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

def load_data():
    trainloader = CNNDataloader()
    train_loader=[]
    val_loader=[]
    for i, (data_in, y) in enumerate(trainloader):
        if i <= 4000:
            train_loader.append([data_in, y])
        if i > 4000:
            val_loader.append([data_in, y])

    print(len(train_loader),  len(val_loader))
    return train_loader, val_loader

def z_score(p):
    return (p-np.mean(p))/np.std(p)

def get_model():
    model = CNN()
    return model

class CNN(nn.Module):
    def __init__(self):
        super().__init__()

        self.Conv=nn.ModuleList( [nn.Conv2d(1,519,(1, 224))] )
        #self.Conv=nn.ModuleList( [nn.Conv2d(1,519,(n, 224)) for n in (2, 3, 4)] )

        self.fc=nn.Linear(519*1, 2)

    def relu_pool(self, In, conv):
        In=F.relu(conv(In)).squeeze(3)
        In=F.max_pool1d(In, In.size(2)).squeeze(2)
        return In

    def forward(self, out):
        out=out.unsqueeze(0)
        out=out.unsqueeze(1)
        out=torch.cat([self.relu_pool(out,conv) for conv in self.Conv],1)
        out=self.fc(out)
        return out

class CNNDataloader():
    def __init__(self):
        X = []
        Y = []
        avg=0
        avg_num=0
        i=0
        for index,row in df.iterrows():
        #for i in range(len(df)):
        #for i in range(1200):
            i=i+1
            if i <= 5200:
                img=np.load(image_dir+row['filename'])['x']
                X.append(img[0])
                if row['urban']:
                    Y.append([row['wealthpooled'], 1])
                else:
                    Y.append([row['wealthpooled'], 0])

                #X.append(np.rot90(img[0]))
                #if row['urban']:
                #    Y.append([row['wealthpooled'], 1])
                #else:
                #    Y.append([row['wealthpooled'], 0])

                avg=avg+row['wealthpooled']
                avg_num=avg_num+1
                if int(i/100) == i/100:
                    print(i)

        self.x_train=X
        self.y_train=Y
        avg=avg/avg_num
        print('@@@@@@',avg)
    def __len__(self):
        return len(self.x_train)
    def __getitem__(self,idx):
        return self.x_train[idx], self.y_train[idx]

class Train():
    def __init__(self):

        self.__train_loader, self.__val_loader = load_data()

        self.__epochs = 200
        self.lr = 0.001
        self.__current_epoch = 0
        self.__patience = 20
        self.__early_stop = True
        self.__lowest_val_loss = 10000
        self.__training_losses = []
        self.__training_losses2 = []
        self.__val_losses = []
        self.__val_losses2 = []
        self.__model = get_model()
        self.__model.to(device)
        self.__criterion2 = torch.nn.MSELoss()
        self.__criterion = torch.nn.L1Loss(reduction='mean')

        #self.__optimizer = torch.optim.Adam(self.__model.parameters(), lr=self.lr)
        self.__optimizer = torch.optim.SGD(self.__model.parameters(), lr=self.lr, momentum=0.5, weight_decay=0.002)
        self.__scheduler = torch.optim.lr_scheduler.ExponentialLR(self.__optimizer, gamma=0.9)


        self.__init_model()

    def __init_model(self):
        if torch.cuda.is_available():
            self.__model = self.__model.cuda().float()
            self.__criterion = self.__criterion.cuda()

    def run(self):
        start_epoch = self.__current_epoch
        count = 0
        best_model=""
        for epoch in range(start_epoch, self.__epochs):
            start_time = datetime.now()
            self.__current_epoch = epoch
            train_loss, train_loss2 = self.__train()
            val_loss, val_loss2 = self.__val()
            self.__training_losses.append(train_loss)
            self.__training_losses2.append(train_loss2)
            self.__val_losses.append(val_loss)
            self.__val_losses2.append(val_loss2)
            print("Epoch:{}, Training loss: {:.6f},Training loss2: {:.6f}, VAL loss: {:.6f}, VAL loss2: {:.6f}".format(epoch, train_loss, train_loss2, val_loss, val_loss2))

            self.__scheduler.step()

            if val_loss >= self.__lowest_val_loss:
                count+=1
            else:
                count=0
                self.__lowest_val_loss = val_loss
                best_model=copy.deepcopy(self.__model)

            if self.__early_stop and count == self.__patience:
                print("Early stopped at: {}".format(epoch))
                break

        return best_model

    def __train(self):
        self.__model.train()
        training_loss = 0
        training_loss2 = 0
        start_time = time.time()

        for i, (data_in, y) in enumerate(self.__train_loader):
            data_in = z_score(data_in)
            data_in = torch.tensor(data_in, dtype=torch.float32).to(device)
            y=torch.tensor(y, dtype=torch.float32).to(device)
            y=y.unsqueeze(0)
            self.__optimizer.zero_grad()

            out = self.__model(data_in)
            #out=out[0].unsqueeze(0)

            loss = self.__criterion(out, y)
            loss2 = self.__criterion2(out, y)
            loss.backward()

            training_loss += loss.item()
            training_loss2 += loss2.item()
            self.__optimizer.step()

        training_loss = training_loss / len(self.__train_loader)
        training_loss2 = training_loss2 / len(self.__train_loader)
        return training_loss, training_loss2

    def __val(self):
        self.__model.eval()
        val_loss = 0
        val_loss2 = 0
        start_time = time.time()
        with torch.no_grad():
            for i, (data_in, y) in enumerate(self.__val_loader):
                data_in = z_score(data_in)
                data_in = torch.tensor(data_in, dtype=torch.float32).to(device)
                y=torch.tensor(y, dtype=torch.float32).to(device)
                y=y.unsqueeze(0)

                out = self.__model(data_in)
                #out=out[0].unsqueeze(0)
                
                loss = self.__criterion(out, y)
                loss2 = self.__criterion2(out, y)

                val_loss += loss.item()
                val_loss2 += loss2.item()
            val_loss = val_loss / len(self.__val_loader)
            val_loss2 = val_loss2 / len(self.__val_loader)
        return val_loss, val_loss2

if __name__=='__main__':
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    import os
    path = os.getcwd()
    _uname = path.split('/')[2]
    poverty_dir=f'/home/{_uname}/public/cs255-sp22-a00-public/poverty'
    image_dir=poverty_dir+'/anon_images/'
    train_table=f'/home/{_uname}/public/Datasets_public/Final_Project_Data/train.csv'
    df=pd.read_csv(train_table,index_col=0)
    df.index=df['filename']
    #print(image_dir+df.filename[9])
    #img=np.load(image_dir+df.filename[9])['x']
    #print(img.shape)
    #plt.imshow(img[0].squeeze())
    #print(df.wealthpooled[9],df.urban[9])
    best_model=Train().run()
    folds=[{'in':'country_test_reduct.csv','out':'results_country.csv'},
            {'in':'random_test_reduct.csv','out':'results.csv'}]

    for fold_i in range(len(folds)):
        fold=folds[fold_i]

        test_csv=f'/home/{_uname}/public/Datasets_public/Final_Project_Data/{fold["in"]}'
        test=pd.read_csv(test_csv,index_col=0)
        #test.index=test['filename']
        #print (test)

        out_df=pd.DataFrame()
        out_df['filename'] = test['filename']
        out_df['urban']=test['urban']
        out_df['pred_wo_abstention']=0
        out_df['pred_with_abstention']=0
        #out_df.set_index('filename', inplace=True)
        #print (out_df)

        avg=0
        for i in range(len(out_df)):
            filename=out_df.iloc[i,0]
            img=np.load(image_dir+filename)['x']
            test_data = z_score(img[0])
            test_data=torch.tensor(test_data, dtype=torch.float32).to(device)
            out = best_model(test_data)
            #print(out)
            #print(image_dir+filename)
            if out[0][1] < 0.5:
                out_df.iloc[i,1]='False'
            else:
                out_df.iloc[i,1]='True'
            if out[0][0] < avg:
                out_df.iloc[i,2]=-1
                if out[0][0] < avg-0.4:
                    out_df.iloc[i,3]=-1
                else:
                    out_df.iloc[i,3]=0
            else:
                if out[0][0] > avg+0.4:
                    out_df.iloc[i,3]=-1
                else:
                    out_df.iloc[i,3]=0
                out_df.iloc[i,2]=1
                out_df.iloc[i,3]=1

            #print(out_df.iloc[i,0],out_df.iloc[i,1],out_df.iloc[i,2],out_df.iloc[i,3])
            
        out_df=out_df.reset_index(drop=True)
        outFile=f'/home/{_uname}/bbb/{fold["out"]}'
        out_df.to_csv(outFile)
        print('\n\n'+'-'*60)
        print(outFile)


100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
2100
2200
2300
2400
2500
2600
2700
2800
2900
3000
3100
3200
3300
3400
3500
3600
3700
3800
3900
4000
4100
4200
4300
4400
4500
4600
4700
4800
4900
5000
5100
5200
@@@@@@ 0.012354564807575606
4001 1199


  return torch.max_pool1d(input, kernel_size, stride, padding, dilation, ceil_mode)


Epoch:0, Training loss: 0.595356,Training loss2: 0.597825, VAL loss: 0.545570, VAL loss2: 0.480220
Epoch:1, Training loss: 0.505196,Training loss2: 0.448843, VAL loss: 0.564370, VAL loss2: 0.505945
Epoch:2, Training loss: 0.468822,Training loss2: 0.391199, VAL loss: 0.489678, VAL loss2: 0.417228
Epoch:3, Training loss: 0.425071,Training loss2: 0.329100, VAL loss: 0.579949, VAL loss2: 0.555060
Epoch:4, Training loss: 0.401862,Training loss2: 0.298953, VAL loss: 0.539585, VAL loss2: 0.496445
Epoch:5, Training loss: 0.365318,Training loss2: 0.249494, VAL loss: 0.495902, VAL loss2: 0.418437
Epoch:6, Training loss: 0.341829,Training loss2: 0.223054, VAL loss: 0.663135, VAL loss2: 0.737944
Epoch:7, Training loss: 0.321282,Training loss2: 0.201413, VAL loss: 0.555770, VAL loss2: 0.530450
Epoch:8, Training loss: 0.297531,Training loss2: 0.175692, VAL loss: 0.533468, VAL loss2: 0.496278
Epoch:9, Training loss: 0.279643,Training loss2: 0.155861, VAL loss: 0.503480, VAL loss2: 0.404581
Epoch:10, 