In [1]:
#deal with tensors
import torch   
import sys
#handling text data
from torchtext import data    
import pandas as pd
from torchtext.vocab import Vectors
from collections import Counter

In [2]:
import os
import json

In [3]:
from time import time

In [4]:
#Reproducing same results
SEED = 2020

#Torch
torch.manual_seed(SEED)

#Cuda algorithms
torch.backends.cudnn.deterministic = True  

In [None]:
#Load data

In [5]:
#Label 1 for Tunisian sentences
train = pd.read_csv("train/MADAR.corpus6Tunis.txt",header=None,sep="\n")
train["label"] = 1
train.head()

Unnamed: 0,0,label
0,موجود في اخر الكولوار. و نجيبهولك توا. كان تست...,1
1,تعملوا تبديلات؟,1
2,نحبوا طاولة قريبة مالشباك.,1
3,اهوكا غادي، بالضبط قدام البيرو متاع الارشادات ...,1
4,عمري ما سمعت بالادريسة هاذي هوني.,1


In [6]:
#Label 0 for other dialects
train_0 = pd.read_csv("train/MADAR.corpus6Cairo.txt",header=None,sep="\n")
train_0 = train_0.append(pd.read_csv("train/MADAR.corpus6MSA.txt",header=None,error_bad_lines=False,sep="\n"))
train_0 = train_0.append(pd.read_csv("train/MADAR.corpus6Rabat.txt",header=None,error_bad_lines=False,sep="\n"))
train_0["label"] = 0
print(train_0.shape)
train_0.head()

(36000, 2)


Unnamed: 0,0,label
0,هو في اخر القاعة . أنا حأجيبلك شويه دلوقتي . ل...,0
1,بتعمل تبديلات؟,0
2,عايزين ترابيزة جنب الشباك .,0
3,ده قدامك هناك، يادوبك قدام مكتب استعلامات السي...,0
4,عمري ما سمعت عن العنوان ده هنا .,0


In [7]:
#merge data
train = train.append(train_0)
train.head(2)

Unnamed: 0,0,label
0,موجود في اخر الكولوار. و نجيبهولك توا. كان تست...,1
1,تعملوا تبديلات؟,1


In [8]:
print(train.shape)

(48000, 2)


In [9]:
train.rename({0:"text"},inplace=True,axis=1)
train.head()

Unnamed: 0,text,label
0,موجود في اخر الكولوار. و نجيبهولك توا. كان تست...,1
1,تعملوا تبديلات؟,1
2,نحبوا طاولة قريبة مالشباك.,1
3,اهوكا غادي، بالضبط قدام البيرو متاع الارشادات ...,1
4,عمري ما سمعت بالادريسة هاذي هوني.,1


In [10]:
#shuffling data
train = train.sample(frac=1).reset_index(drop=True)
train.head()

Unnamed: 0,text,label
0,نجم نبعث كان مايل واحد ال دولار؟,1
1,خرج بسرعة في نص الشارع اللي مفهوش مكان إنه يعدي.,0
2,واحد ناقص . مجموع ما اشتريت خمسة .,0
3,عندك منقالة سلسلة؟,1
4,فصيلة الدم ديالي هي إي.,0


In [None]:
#some preprocessing

In [11]:
def remove_puncts(text):
    puncts = ['«','ï' ,'«', '"','-', '|', "'", '$', '&', '/', '[', ']', '>', '%', '=', '#', '*', '+', '\\', '•',  '~', '@', '£', 
 '·', '_', '{', '}', '©', '^', '®', '`',  '<', '→', '°', '€', '™', '›',  '♥', '←', '×', '§', '″', '′', 'Â', '█', '½', '…', 
 '“', '★', '”', '–', '●', 'â', '►', '−', '¢', '²', '¬', '░', '¶', '↑', '±', '¿', '▾', '═', '¦', '║', '―', '¥', '▓', '—', '‹', '─', 
 '▒', '：', '¼', '⊕', '▼', '▪', '†', '■', '’', '▀', '¨', '▄', '♫', '☆', '¯', '♦', '¤', '▲', '¸', '¾', 'Ã', '⋅', '‘', '∞', 
 '∙', '）', '↓', '、', '│', '（', '»', '，', '♪', '╩', '╚', '³', '・', '╦', '╣', '╔', '╗', '▬', '❤', 'ï', 'Ø', '¹', '≤', '‡', '√', ]
    for punct in puncts:
        if punct in text:
            text = text.replace(punct, "")
    return text

train["text"] = train["text"].apply(remove_puncts)


In [12]:
train["text"] = train["text"].apply(str)
train.head()

Unnamed: 0,text,label
0,نجم نبعث كان مايل واحد ال دولار؟,1
1,خرج بسرعة في نص الشارع اللي مفهوش مكان إنه يعدي.,0
2,واحد ناقص . مجموع ما اشتريت خمسة .,0
3,عندك منقالة سلسلة؟,1
4,فصيلة الدم ديالي هي إي.,0


In [None]:
#Preparing the data as it is expected to ELMo
#ELMo:Training a biLM on a new corpus
#https://github.com/allenai/bilm-tf

In [13]:
# corpus = list of tokenized sentences
corpus = []
def to_corpus(row):
    new = str(row).split()
    corpus.append(new)
    return new

train["samples"] = train["text"].apply(to_corpus)

In [14]:
texts = " ".join(train['text'].tolist())
words = texts.split(" ")

In [15]:
dictionary = Counter(words)
print("Size of Vocab",len(dictionary))
sorted_vocab = ["<S>","</S>","<UNK>"]
sorted_vocab.extend([pair[0] for pair in dictionary.most_common()])

Size of Vocab 49052


In [16]:
fp = open("train_vocab.txt","w")
for i in sorted_vocab:
  fp.write(i)
  fp.write("\n")
fp.close()



In [17]:
train_d = train.sample(frac=1).reset_index(drop=True)
train_d.head()

Unnamed: 0,text,label,samples
0,شكوناهوا الفنان؟,0,"[شكوناهوا, الفنان؟]"
1,ايوه، انا حجزت تبع وكاله سياحة اي بي سي .,0,"[ايوه،, انا, حجزت, تبع, وكاله, سياحة, اي, بي, ..."
2,أي، موجود. هوني يجي تران. نهارك زين.,1,"[أي،, موجود., هوني, يجي, تران., نهارك, زين.]"
3,كم مرة تقيم حفلاً موسيقياً ؟,0,"[كم, مرة, تقيم, حفلاً, موسيقياً, ؟]"
4,حسنا، غادي نعاود الاتصال بيك من بعد. شكرا.,0,"[حسنا،, غادي, نعاود, الاتصال, بيك, من, بعد., ش..."


In [18]:
train_d["count"] = train_d["samples"].apply(len)

In [19]:
train_d.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
label,48000.0,0.25,0.433017,0.0,0.0,0.0,0.25,1.0
count,48000.0,6.414417,3.691133,1.0,4.0,6.0,8.0,59.0


In [20]:
train_d.shape

(48000, 4)

In [21]:
train_d = train_d[ train_d["count"] <= 60 ]
train_d.shape

(48000, 4)

In [22]:
train_d["count"].value_counts()

5     7275
4     7168
6     6302
3     5774
7     5073
8     3772
9     2669
2     2560
10    1953
11    1347
12     991
13     688
14     495
15     386
1      331
16     281
17     190
18     151
19      98
20      93
21      75
22      61
23      54
24      34
26      30
25      19
27      19
28      18
29      18
30      13
33       8
31       8
36       7
35       6
32       5
39       4
34       4
38       3
42       3
41       3
48       2
52       2
44       1
46       1
47       1
40       1
49       1
50       1
59       1
Name: count, dtype: int64

In [23]:
import torch.nn as nn
from allennlp.modules.elmo import Elmo, batch_to_ids


class Classifier(nn.Module):
    
    #define all the layers used in model
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, 
                 dropout,options_file,weight_file):
        
        #Constructor
        super().__init__()          
        
        #Elmo Layers
        #requires_grad=True to fine-tune elmo parameters
        self.elmo = Elmo(options_file, weight_file,1 ,requires_grad = False,dropout = 0.3)            
                
        #dense layers
        self.fc_1 = nn.Linear(embedding_dim, hidden_dim)
        self.fc = nn.Linear(hidden_dim, output_dim)
        
        #dropout 
        self.droplayer = nn.Dropout(p= dropout)
        
        #activation function
        self.act_1 = nn.ReLU()
        self.act = nn.Sigmoid()
        
    def forward(self, sentences):

        #inputs for elmo of shape [batch size,time_steps,50]
        embedded = self.elmo(sentences)
        #print(len(embedded["elmo_representations"]))

        #retrieve the embeddings having shape [batch size, sent_len, emb dim]
        embedded = embedded["elmo_representations"][0]
        embedded = embedded[:,0,:]
        embedded = embedded.view(embedded.shape[0],embedded.shape[-1])


        dense_outputs = self.act_1(self.fc_1(embedded))
        
        dense_outputs = self.droplayer(dense_outputs)
        
        dense_outputs = self.fc(dense_outputs)

        #Final activation function
        outputs = self.act(dense_outputs)
        
        return outputs

In [24]:
#No. of trianable parameters
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

In [25]:
import torch.optim as optim
from torch import LongTensor

#define metric
def binary_accuracy(preds, y):
    #round predictions to the closest integer
    rounded_preds = torch.round(preds)
    
    correct = (rounded_preds == y).float() 
    acc = correct.sum() / len(correct)
    return acc
    


In [26]:
def train(model, iterator, optimizer, criterion):

    
    t0 = time()
    print("\ntraining process...:")

    #initialize every epoch 
    epoch_loss = 0
    epoch_acc = 0
    #set the model in training phase
    model.train()  
    iterator_length = len(iterator)
    
    for step, batch in enumerate(iterator):

        if step % 50 == 0 and not step == 0:
          # Calculate elapsed time in minutes.
          elapsed = format_time(time() - t0)
          # Report progress.
          print(' Batch {:>5,} of {:>5,}. Elapsed: {:}.'.format(step, len(iterator), elapsed)) 
         # [0]: input ids 
         # [1]: labels
        b_input_ids = batch[0].to(device)
        b_labels = batch[1].to(device)

        #resets the gradients after every batch
        optimizer.zero_grad()   
        
        #convert to 1D tensor
        predictions = model(b_input_ids).squeeze()
        b_labels = b_labels.type_as(predictions)

        #compute the loss
        loss = criterion(predictions, b_labels)        
        
        #compute the binary accuracy
        acc = binary_accuracy(predictions, b_labels)   
        
        #backpropage the loss and compute the gradients
        loss.backward()       
        
        #update the weights
        optimizer.step()      
        
        #loss and accuracy
        epoch_loss += loss.item()  
        epoch_acc += acc.item()
    
    print("\ntraining_epoch_loss: ",epoch_loss/iterator_length,"\ntraining_epoch_acc: ",epoch_acc/iterator_length)
    
        
    return epoch_loss / iterator_length , epoch_acc / iterator_length

In [27]:
def evaluate(model, iterator, criterion):
    
    t0 = time()
    print("\nvalidation process...:")
    #initialize every epoch
    epoch_loss = 0
    epoch_acc = 0

    #deactivating dropout layers
    model.eval()
    
    #deactivates autograd
    with torch.no_grad():
    
        for step, batch in enumerate(iterator):

            if step % 40 == 0 and not step == 0:
               # Calculate elapsed time in minutes.
               elapsed = format_time(time() - t0)
               # Report progress.
               print(' Batch {:>5,} of {:>5,}. Elapsed: {:}.'.format(step, len(iterator), elapsed)) 
            
             # [0]: input ids 
             # [1]: labels
            b_input_ids = batch[0].to(device)
            b_labels = batch[1].to(device)
            
            #convert to 1d tensor
            predictions = model(b_input_ids).squeeze()
            b_labels = b_labels.type_as(predictions) 
            
            #compute loss and accuracy
            loss = criterion(predictions, b_labels)
            acc = binary_accuracy(predictions, b_labels)
            
            #keep track of loss and accuracy
            epoch_loss += loss.item()
            epoch_acc += acc.item()
    print("\nvalidation_epoch_loss: ",epoch_loss/len(iterator),"\nValidation_epoch_acc: ",epoch_acc/len(iterator))
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [28]:
def predict(model,iterator):
    
    model.eval()
    preds = torch.FloatTensor().to(device)
    labels = torch.FloatTensor().to(device)
    with torch.no_grad():
    
        for step, batch in enumerate(iterator):            
             # [0]: input ids 
             # [1]: labels
            b_input_ids = batch[0].to(device)
            b_labels = batch[1].to(device)
            
            #convert to 1d tensor
            predictions = model(b_input_ids).squeeze()
            b_labels = b_labels.type_as(predictions) 
            
            preds = torch.cat((preds,predictions))
            labels = torch.cat((labels,b_labels))

    return preds,labels
        

        

In [29]:
sentences = train_d.samples.values.tolist()
labels = train_d.label.values

#sentences

In [30]:
#help(batch_to_ids)

In [31]:
#input_ids of shape [len(batch), max sentence length, max word length]
input_ids = batch_to_ids(sentences)

In [32]:
input_ids.shape

torch.Size([48000, 59, 50])

In [33]:
from sklearn.model_selection import train_test_split# Use 90% for training and 10% for validation.
train_inputs, validation_inputs, train_labels, validation_labels = train_test_split(input_ids, labels, random_state=2, test_size=0.13)

In [34]:
train_inputs.shape

torch.Size([41760, 59, 50])

In [None]:
train_inputs = torch.tensor(train_inputs)
validation_inputs = torch.tensor(validation_inputs)

train_labels = torch.tensor(train_labels)
validation_labels = torch.tensor(validation_labels)

  """Entry point for launching an IPython kernel.


In [None]:
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler

batch_size = 32 


#Create the DataLoader for our training set.
train_data = TensorDataset(train_inputs, train_labels)
train_sampler = RandomSampler(train_data)
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)


# Create the DataLoader for our validation set.
validation_data = TensorDataset(validation_inputs, validation_labels)
validation_sampler = SequentialSampler(validation_data)
validation_dataloader = DataLoader(validation_data, sampler=validation_sampler, batch_size=batch_size)

from shutil import copyfile
if not os.path.exists("model"):
    os.makedirs("model")

copyfile("/home/mahmoud/Desktop/PFE_Tunisian_Dialect/data/new_corpus/train/pre-trained_elmo/swb/vocab.txt",
         "model/vocab.txt")

copyfile("/home/mahmoud/Desktop/PFE_Tunisian_Dialect/data/new_corpus/train/pre-trained_elmo/swb/checkpoint/options.json",
         "model/options.json")

copyfile("/home/mahmoud/Desktop/PFE_Tunisian_Dialect/data/new_corpus/train/pre-trained_elmo/swb/swb_weights.hdf5",
         "model/swb_weights.hdf5")

#set n_characters to 262 in options.json
a_file = open("model/options.json", "r")

json_object = json.load(a_file)

a_file.close()
#print(json_object)
json_object["char_cnn"]["n_characters"] = 262

a_file = open("model/options.json", "w")

json.dump(json_object, a_file)

a_file.close()

In [None]:
vocab_file = "train_vocab.txt"

#parameters of the model
options_file = "model/options.json"
weight_file = "model/swb_weights.hdf5"

vocab_size = len(sorted_vocab)
embedding_dim = 128    #Size of ELMO pretrained embeddings
hidden_dim = 32
output_dim = 1
#n_layers = 1 
dropout = 0.6




In [None]:
model = Classifier(vocab_size,embedding_dim,hidden_dim,output_dim,dropout,
              options_file,weight_file)

model.cuda()

In [None]:
print("model parameters:\n", count_parameters(model))
N_trainable_params = count_parameters(model)

In [None]:
from IPython.display import clear_output
from torch import optim
#check whether cuda is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')  

optimizer = optim.Adam(model.parameters())
criterion = nn.BCELoss()

#push to cuda if available
model = model.to(device)

criterion = criterion.to(device)

In [None]:
import random
import numpy as np
seed_val = 2
random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)

In [None]:
#import time
import datetime
def format_time(elapsed):
 '''
 Takes a time in seconds and returns a string hh:mm:ss
 '''
 # Round to the nearest second.
 elapsed_rounded = int(round((elapsed)))

 # Format as hh:mm:ss
 return str(datetime.timedelta(seconds=elapsed_rounded))

In [None]:
epochs = 5
t1 = time()

loss_values_eval = []
acc_values_eval = []

loss_values_train = []
acc_values_train = []

for epoch in range(epochs):
  loss_train,acc_train = train(model, train_dataloader, optimizer, criterion)
  loss_eval,acc_eval = evaluate(model, validation_dataloader, criterion)
    
  loss_values_eval.append(loss_eval)
  acc_values_eval.append(acc_eval)

  loss_values_train.append(loss_train)
  acc_values_train.append(acc_train)

elapsed = format_time(time()-t1)
print("elapsed time: ",elapsed)

In [None]:
loss_values_train

In [None]:
loss_values_train = np.array(loss_values_train)
loss_values_eval = np.array(loss_values_eval)

e = range(1,epochs+1)

In [None]:
import matplotlib.pyplot as plt
plt.figure(figsize=(10,7))
plt.plot(e, loss_values_train, 'g', label='Training loss')
plt.plot(e, loss_values_eval, 'b', label='validation loss')
plt.title('Training and Validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.savefig('loss_curve.png')
plt.show()

In [None]:
import plotly.express as px
f = pd.DataFrame(loss_values_eval)
f.columns=['Loss']
fig = px.line(f, x=f.index, y=f.Loss)
fig.update_layout(title='Evaluation loss of the Model',xaxis_title='Epoch',yaxis_title='Loss')
fig.show()

In [None]:
f = pd.DataFrame(acc_values_eval)
f.columns=['Accuracy']
fig = px.line(f, x=f.index, y=f.Accuracy)
fig.update_layout(title='Evaluation accuracy of the Model',xaxis_title='Epoch',yaxis_title='Accuracy')
fig.show()

In [None]:
#####################################################################

#SAVE the model weights
path = "model_v3_weights.pt"
torch.save(model.state_dict(), path)
torch.save(model, path)
#LOAD the model weights
#model = TheModelClass(*args, **kwargs)
#model.load_state_dict(torch.load(PATH))
#model.eval()

In [None]:
#####################################################################

In [None]:
#TEST

path = "model_v3_weights.pt"
model.load_state_dict(torch.load(path))
model.eval()

In [None]:
test = pd.read_csv("test/QADI_test.txt",header=None,sep='\t')
test.rename({0:"text",1:"label"},inplace=True,axis=1)
test["label"] = test["label"].apply(str)
test.head()

In [None]:
test["label"].value_counts()

In [None]:
#test.loc[test.label == 'TN','label'] = 1
#test.loc[test.label != 'TN','label'] = 0
def labeling(label):
    if str(label).strip()=='TN':
        return 1
    else:
        return 0
    return
test["label"] = test["label"].apply(labeling)

test["label"].value_counts()

In [None]:
test_0 = pd.read_csv("test/MADAR.corpus26_totestTunis.txt",header=None,sep="\n")
test_0.rename({0:"text"},inplace=True,axis=1)
test_0["label"] = 1

test = test.append(test_0)
test.head(2)

In [None]:
test["text"] = test["text"].apply(remove_puncts)

In [None]:
def remove_intruder(text):
    puncts = ["USER","EMOJI","NUM","URL","NEWLINE"]
    for punct in puncts:
        if punct in text:
            text = text.replace(punct, "")
    return text

test["text"] = test["text"].apply(remove_intruder)
test["label"].value_counts()

In [None]:
def tokenize(row):
    return row.split()

test["samples"] = test["text"].apply(tokenize)
test["label"] = test["label"].apply(int)


In [None]:
test = test.sample(frac=1).reset_index(drop=True)
test.head()

In [None]:
sentences = test.samples.values.tolist()
labels = test.label.values

In [None]:
input_ids = batch_to_ids(sentences)

In [None]:
input_ids.shape

In [None]:
test_inputs = torch.tensor(input_ids)
test_labels = torch.tensor(labels)

# Create the DataLoader for our validation set.
test_data = TensorDataset(test_inputs, test_labels)
test_sampler = SequentialSampler(test_data)
test_dataloader = DataLoader(test_data, sampler=test_sampler, batch_size=batch_size)

In [None]:
predictions, labels = predict(model,test_dataloader)

In [None]:
#Converting prababilities to int
predictions = (predictions>0.5).int()

In [None]:
predictions = predictions.cpu()
labels = labels.cpu()

In [None]:
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
#print("accuracy: ",accuracy_score(labels,predictions))
target_names = ['other', 'TN']
print(classification_report(labels, predictions, target_names=target_names))

In [None]:
from sklearn.metrics import confusion_matrix
confusion_matrix(labels, predictions)

In [None]:
accuracy = accuracy_score(labels,predictions)
print(accuracy)

In [None]:
import seaborn as sn
import pandas as pd
import matplotlib.pyplot as plt

array = confusion_matrix(labels, predictions)
import numpy as np


def plot_confusion_matrix(cm,
                          target_names,
                          title='Confusion matrix',
                          cmap=None,
                          normalize=False):
    """
    given a sklearn confusion matrix (cm), make a nice plot

    Arguments
    ---------
    cm:           confusion matrix from sklearn.metrics.confusion_matrix

    target_names: given classification classes such as [0, 1, 2]
                  the class names, for example: ['high', 'medium', 'low']

    title:        the text to display at the top of the matrix

    cmap:         the gradient of the values displayed from matplotlib.pyplot.cm
                  see http://matplotlib.org/examples/color/colormaps_reference.html
                  plt.get_cmap('jet') or plt.cm.Blues

    normalize:    If False, plot the raw numbers
                  If True, plot the proportions

    Usage
    -----
    plot_confusion_matrix(cm           = cm,                  # confusion matrix created by
                                                              # sklearn.metrics.confusion_matrix
                          normalize    = True,                # show proportions
                          target_names = y_labels_vals,       # list of names of the classes
                          title        = best_estimator_name) # title of graph

    Citiation
    ---------
    http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html

    """
    import matplotlib.pyplot as plt
    import numpy as np
    import itertools

    accuracy = np.trace(cm) / np.sum(cm).astype('float')
    misclass = 1 - accuracy

    if cmap is None:
        cmap = plt.get_cmap('Blues')

    plt.figure(figsize=(20, 20))
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()

    if target_names is not None:
        tick_marks = np.arange(len(target_names))
        plt.xticks(tick_marks, target_names, rotation=45)
        plt.yticks(tick_marks, target_names)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]


    thresh = cm.max() / 1.5 if normalize else cm.max() / 2
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        if normalize:
            plt.text(j, i, "{:0.4f}".format(cm[i, j]),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")
        else:
            plt.text(j, i, "{:,}".format(cm[i, j]),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")


    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label\naccuracy={:0.4f}; misclass={:0.4f}'.format(accuracy, misclass))
    plt.show()

In [None]:
plot_confusion_matrix(array,["other","TD"])

d = {"Architecture/description": "ELMo->Linear->ReLU->Linear->Sigmoid, with ELMo weights trainable",
    "ELMo_emb_size":embedding_dim,
     "dropout": dropout,
     "BATCH_SIZE": batch_size,
     "epochs": epochs,
     "accuracy": accuracy,
     "Trainable_params": N_trainable_params,
     "Time": elapsed
    }

r = pd.read_csv("results.csv")

r = r.append(d,ignore_index=True)
r.head()

In [147]:
r.to_csv("results.csv",index=False)