# OVANA

Run this notebook in Google Colab and provide it a Google Drive with the files stored in `../dataset`.

Switch runtime to GPU

In [None]:
import csv
import pandas as pd
import numpy as np
from flair.embeddings import WordEmbeddings, FlairEmbeddings, TransformerWordEmbeddings, StackedEmbeddings, FastTextEmbeddings, CharacterEmbeddings, DocumentLSTMEmbeddings
from flair.models import SequenceTagger, TextClassifier
from flair.trainers import ModelTrainer
from pathlib import Path
from flair.data import Corpus
from flair.datasets import ColumnCorpus, CSVClassificationCorpus
from flair.data import Sentence

Start this notebook from the following path:

In [None]:
path = "/content/drive/My Drive/Colab Notebooks/OVANA/"

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# [First Use Only] Splitting the dataset

Save the tagged CVEs into path/dataset

In [None]:
years = ["2013", "2014", "2015", "2016", "2017", "2018", "2019", "additional_1", "additional_2"] 

In [None]:
new_file = open("tagged_all.csv", "w")

In [None]:
for year in years:
    with open(path + "dataset/tagged_descriptions_" + year +".json", "r") as file:
        for i, line in enumerate(file):
            word_line = line.split(" ")
            
            if i != 0 and last_cve_id != word_line[2].replace("\n", ""):
                new_file.write("\n")
            last_cve_id = word_line[2].replace("\n", "")
            
            new_file.write(" ".join(word_line))
    new_file.write("\n")

In [None]:
new_file.close()

In [None]:
import csv
data_tmp = pd.read_table("tagged_all.csv", sep=" ", encoding="utf-8", quoting=csv.QUOTE_NONE, skip_blank_lines=False, header=None, na_values=["NaN", ""], keep_default_na=False)
data_tmp[:40]

Calculating where to split the data 

In [None]:
dev_size = 0.1
test_size = 0.1

In [None]:
items_rows = [i for i, n in enumerate(data_tmp.iloc[:,2].tolist()) if str(n) == "nan"]
train_last_row = items_rows[int(len(items_rows)*(1.0-dev_size-test_size))]
dev_last_row = items_rows[int(len(items_rows)*(1.0-test_size))]

In [None]:
len(items_rows)

Splitting the dataset

In [None]:
train_set = data_tmp.iloc[0:train_last_row]
train_set.to_csv('train.csv', sep=' ', index = False, header = False, quoting=csv.QUOTE_NONE)
dev_set = data_tmp.iloc[train_last_row:dev_last_row]
dev_set.to_csv('dev.csv', sep=' ', index = False, header = False, quoting=csv.QUOTE_NONE)
test_set = data_tmp.iloc[dev_last_row:]
test_set.to_csv('test.csv', sep=' ', index = False, header = False, quoting=csv.QUOTE_NONE)

#Preparation

Name the tag that should be predicted

In [None]:
tag_to_be_predicted = "AI"
oversample = ""

In [None]:
def oversample(tag, set, n):
    #new_file = open(set + "_oversampled.csv", "w")

    with open(set + ".csv", "r") as file:
        data = file.readlines()
    file.close()

    current_instance = []
    has_tag = False
    for i, line in enumerate(data):
        if line.replace(" ", "") == "\n":
            if has_tag:
                for j in range(n):
                    for (word, t, e) in current_instance:
                        data[i] += word
                        data[i] += " " + t
                        data[i] += " " + e
                    data[i] += "  \n"
            has_tag = False
            current_instance = []
            continue

        word_line = line.split(" ")
        if tag in word_line[1]:
            has_tag = True
            
        current_instance.append((word_line[0], word_line[1], word_line[2]))

    with open(set + "_oversampled.csv", "w") as file:
        file.writelines(data)
    file.close()

Specify if the dataset examples containing the tag should be oversampled (do not execute if no oversampling should be performed) 

In [None]:
oversample(tag_to_be_predicted, "train", 2)
oversample = "_oversampled"

# Converting the data

Upload the right train.csv, dev.csv and test.csv

Normal tagging scheme used in the paper

In [None]:
# Normal
def creat_tagging_dataset_for(tag, set):
    new_file = open(set + "_tagging_" + tag + ".csv", "w")

    with open(set + ".csv", "r") as file:
        for i, line in enumerate(file):
            if line.replace(" ", "") == "\n":
                new_file.write("\n")
                continue

            word_line = line.split(" ")
            
            if tag in word_line[1]:
                word_line[1] = tag
            else:
                word_line[1] = "O"
            new_file.write(" ".join(word_line))

    new_file.close()

creat_tagging_dataset_for(tag_to_be_predicted, "train" + oversample)
creat_tagging_dataset_for(tag_to_be_predicted, "dev")
creat_tagging_dataset_for(tag_to_be_predicted, "test")

Uncomment and execute for BIO tagging

In [None]:
#def creat_tagging_dataset_for(tag, set):
    new_file = open(set + "_tagging_" + tag + ".csv", "w")
    
    is_begin = True
    with open(set + ".csv", "r") as file:
        for i, line in enumerate(file):
            if line.replace(" ", "") == "\n":
                new_file.write("\n")
                continue

            word_line = line.split(" ")
            
            if tag in word_line[1]:
                word_line[1] = "B" if is_begin else "I"
                is_begin = False
            else:
                word_line[1] = "O"
                is_begin = True
            new_file.write(" ".join(word_line))

    new_file.close()


Uncomment and execute for Dong et al. like tagging

In [None]:
#def creat_tagging_dataset_for(tag, set):
    new_file = open(set + "_tagging_" + tag + ".csv", "w")

    with open(set + ".csv", "r") as file:
        for i, line in enumerate(file):
            if line.replace(" ", "") == "\n":
                new_file.write("\n")
                continue

            word_line = line.split(" ")
            
            if tag in word_line[1]:
                word_line[1] = tag + "\n"
            else:
                word_line[1] = "O\n"
            new_file.write(" ".join(word_line))

    new_file.close()

Building the model

In [None]:
corpus = ColumnCorpus(Path('./'), {0: 'text', 1: 'tag'},
                                test_file="test" + "_tagging_" + tag_to_be_predicted + ".csv",
                                dev_file="dev" + "_tagging_" + tag_to_be_predicted + ".csv",
                                train_file="train" + oversample + "_tagging_" + tag_to_be_predicted + ".csv")

tag_type = "tag"

tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type)
print(corpus.train[0].to_tagged_string('tag'))

In [None]:
print(corpus.train[1].to_tagged_string('tag'))

In [None]:
print(corpus.train[59].to_tagged_string('tag'))

In [None]:
print(len(corpus.train))
print(len(corpus.dev))
print(len(corpus.test))

In [None]:
corpus.dev[-1]

In [None]:
corpus.test[0]

# CVE Tagger

In [None]:
bert_embedding = TransformerWordEmbeddings('bert-base-cased', fine_tune=True)
word_embeddings = [FlairEmbeddings('news-forward-fast'), FlairEmbeddings('news-backward-fast'), bert_embedding]
embeddings = StackedEmbeddings(embeddings=word_embeddings)
 

tagger = SequenceTagger(hidden_size=256,
                        embeddings=embeddings,
                        tag_dictionary=tag_dictionary,
                        tag_type=tag_type,
                        use_crf=True)

Training the model

In [None]:
from torch.optim.adam import Adam
trainer = ModelTrainer(tagger, corpus)#, optimizer=Adam)

In [None]:
#learning_rate_tsv = trainer.find_learning_rate('./')

#from flair.visual.training_curves import Plotter
#plotter = Plotter()
#plotter.plot_learning_rate(learning_rate_tsv)

In [None]:
lr = 0.1

In [None]:
trainer.train('./', max_epochs=20, embeddings_storage_mode="cpu", param_selection_mode=True)

In [None]:
result, score = tagger.evaluate(corpus.train, mini_batch_size=32) #, out_path=f"predictions.txt")
print(result.detailed_results)

In [None]:
result, score = tagger.evaluate(corpus.dev, mini_batch_size=32) #, out_path=f"predictions.txt")
print(result.detailed_results)

In [None]:
result, score = tagger.evaluate(corpus.test, mini_batch_size=32) #, out_path=f"predictions.txt")
print(result.detailed_results)

Prediction of the whole corpus

In [None]:
import csv

for part in range(1,3):
    new_file = open("all_cves_" + str(part) + ".csv", "w")
    with open(path + "dataset/all_cves_" + str(part) + ".json", "r", encoding="ISO-8859-1") as file:
        for i, line in enumerate(file):
            word_line = line.split(" ")
            if len(word_line) < 2:
                continue
                
            if i != 0 and last_cve_id != word_line[1].replace("\n", ""):
                new_file.write("\n")
            last_cve_id = word_line[1].replace("\n", "")
                
            new_file.write(" ".join(word_line))
    new_file.write("\n")
    new_file.close()

    cves_tmp = pd.read_table("all_cves_" + str(part) + ".csv", sep=" ", encoding="utf-8", quoting=csv.QUOTE_NONE, skip_blank_lines=False, header=None, na_values=["NaN", ""], keep_default_na=False)
    cves_tmp.to_csv("all_cves_" + str(part) + "_converted.csv", sep=' ', index = False, header = False, quoting=csv.QUOTE_NONE)
   
    cves_tmp = None

    
    from flair.datasets import ColumnDataset
    p = Path('./') / ("all_cves_" + str(part) + "_converted.csv")
    corpus_to_be_labeled = ColumnDataset(p, {0: 'text'})


    tagger.predict(sentences=corpus_to_be_labeled, mini_batch_size=16)


    new_file = open("all_cves_tagged_" + str(part) + "_" + tag_to_be_predicted + ".csv", "w")
    for sentence in corpus_to_be_labeled.sentences:
        sentence_with_tags = sentence.to_tagged_string('tag')
        tokenized_sentence = sentence_with_tags.split(" ")
        for i,word in enumerate(tokenized_sentence):
            if word == "<" + tag_to_be_predicted + ">":
                continue
            new_file.write(word)

            if i == len(tokenized_sentence) - 1:
                new_file.write(" O\n")
                continue

            if tokenized_sentence[i+1] == "<" + tag_to_be_predicted + ">":
                new_file.write(" " + tag_to_be_predicted + "\n")
            else:
                new_file.write(" O\n")
        new_file.write("  \n")

    new_file.close()
    corpus_to_be_labeled = None

In [None]:
#import csv
#import random

#for part in range(1,3):
#    new_file = open("all_cves_" + str(part) + ".csv", "w")
#    with open(path + "dataset/all_cves_" + str(part) + ".json", "r", encoding="ISO-8859-1") as file:
#        for i, line in enumerate(file):
#            word_line = line.split(" ")
#            if len(word_line) < 2:
#                continue
#                
#            if i != 0 and last_cve_id != word_line[1].replace("\n", ""):
#                new_file.write("\n")
#            last_cve_id = word_line[1].replace("\n", "")
#                
#            new_file.write(" ".join(word_line))
#    new_file.write("\n")
#    new_file.close()
#
#    cves_tmp = pd.read_table("all_cves_" + str(part) + ".csv", sep=" ", encoding="utf-8", quoting=csv.QUOTE_NONE, skip_blank_lines=False, header=None, na_values=["NaN", ""], keep_default_na=False)
#
#    cves_tmp.to_csv("all_cves_" + str(part) + "_converted.csv", sep=' ', index = False, header = False, quoting=csv.QUOTE_NONE)
#   
#    cves_tmp = None
#
#    
#    from flair.datasets import ColumnDataset
#    p = Path('./') / ("all_cves_" + str(part) + "_converted.csv")
#    corpus_to_be_labeled = ColumnDataset(p, {0: 'text'})
#
#    new_file = open("all_cves_tagged_" + str(part) + "_" + tag_to_be_predicted + ".csv", "w")
#    for sentence in corpus_to_be_labeled.sentences:
#        sentence_with_tags = sentence.to_tagged_string('tag')
#        tokenized_sentence = sentence_with_tags.split(" ")
#        for i,word in enumerate(tokenized_sentence):
#            if word == "<" + tag_to_be_predicted + ">":
#                continue
#            new_file.write(word)
#
#            #if i == len(tokenized_sentence) - 1:
#            #    new_file.write(" O\n")
#            #    continue
#
#            if bool(random.getrandbits(1)):
#                new_file.write(" " + tag_to_be_predicted + "\n")
#            else:
#                new_file.write(" O\n")
#        new_file.write("  \n")

#    new_file.close()
#    corpus_to_be_labeled = None

In [None]:
tagged_1_name = "all_cves_tagged_1_" + tag_to_be_predicted +  ".csv"
tagged_2_name = "all_cves_tagged_2_" + tag_to_be_predicted +  ".csv"
!cp "$tagged_1_name" "$path"
!cp "$tagged_2_name" "$path"

In [None]:
tagger = None
trainer = None

# Value Classifier

## CVSS-Matcher integration

In [None]:
years = ["2013", "2014", "2015", "2016", "2017", "2018", "2019", "additional_1", "additional_2"] 

In [None]:
new_file = open("tagged_all.csv", "w")
for year in years:
    with open(path + "dataset/tagged_descriptions_" + year +".json", "r") as file:
        for i, line in enumerate(file):
            word_line = line.split(" ")
            
            if i != 0 and last_cve_id != word_line[2].replace("\n", ""):
                new_file.write("\n")
            last_cve_id = word_line[2].replace("\n", "")
            
            new_file.write(" ".join(word_line))
    new_file.write("\n")
new_file.close()

In [None]:
import csv
tagged_all_tmp = pd.read_table("tagged_all.csv", sep=" ", encoding="utf-8", quoting=csv.QUOTE_NONE, skip_blank_lines=False, header=None, na_values=["NaN", ""], keep_default_na=False)
tagged_all_tmp.to_csv('tagged_all_tmp.csv', sep=' ', index = False, header = False, quoting=csv.QUOTE_NONE)

Save Matcher in path/matcher

In [None]:
matcher_path = path + "matcher/"
dataset_path = path + "dataset/"
!cp tagged_all_tmp.csv "$dataset_path"
%cd "$matcher_path"

In [None]:
matcher_path = path + "matcher/cvss_matcher.py"
output_path = "../dataset/tagged_all_values_" + tag_to_be_predicted + ".csv"
!python "cvss_matcher.py" $tag_to_be_predicted "../dataset/tagged_all_tmp.csv" "$output_path"

In [None]:
%cd 
%cd /content

In [None]:
train_file = dataset_path + "tagged_all_values_" + tag_to_be_predicted + ".csv"
!cp "$train_file" .

In [None]:
def create_classification_dataset_for(tag, set):
    new_file = open(set + "_classification_" + tag + ".csv", "w")
    
    label = ""
    with open(set + "_values_" + tag + ".csv", "r") as file:
        for i, line in enumerate(file):
            if line.replace(" ", "") == "\n":
                if i == 0:
                    continue
                if label != "":
                    new_file.write("\n")
                    new_file.write("\n")
                label = ""
                continue
            word_line = line.replace("\n", "").split(" ")
            
            if tag in word_line[1]:
                if ":" not in word_line[1]:
                    continue
                if label == "":
                    label = word_line[1].split(":")[1]
                    new_file.write(label + " \t")
                new_file.write(word_line[0] + " ")

    new_file.close()

create_classification_dataset_for(tag_to_be_predicted, "tagged_all")

In [None]:
dev_size = 0.1
test_size = 0.1

In [None]:
def split_dataset(set):
    class_dict = {}
    with open(set + ".csv", "r") as file:
        for line in file:
            if line.replace(" ", "") == "\n":
                continue
            words = line.split(" ")
            value = words[0]  
            if value in class_dict:
                class_dict[value].append(line)
            else:
                class_dict[value] = [line]

    train_set, dev_set, test_set = [],[],[]
    for key in class_dict.keys():
        value_data = class_dict[key]
        train_set.extend(value_data[:int(len(value_data)*(1.0-dev_size-test_size))])
        dev_set.extend(value_data[int(len(value_data)*(1.0-dev_size-test_size)):int(len(value_data)*(1.0-dev_size))])
        test_set.extend(value_data[int(len(value_data)*(1.0-dev_size)):])

    new_file = open("train_classification_" + tag_to_be_predicted + ".csv", "w")
    for entity in train_set:
        new_file.write(entity)
        new_file.write("\n")
    new_file.close()
    new_file = open("dev_classification_" + tag_to_be_predicted + ".csv", "w")
    for entity in dev_set:
        new_file.write(entity)
        new_file.write("\n")
    new_file.close()
    new_file = open("test_classification_" + tag_to_be_predicted + ".csv", "w")
    for entity in test_set:
        new_file.write(entity)
        new_file.write("\n")
    new_file.close()

split_dataset("tagged_all_classification_" + tag_to_be_predicted)

In [None]:
oversample = ""

In [None]:
def oversample_classification(class_to_be_oversampled, set, n):
    with open(set + ".csv", "r") as file:
        data = file.readlines()
    file.close()

    has_class = False
    for i, line in enumerate(data):
        word_line = line.split(" ")
        if class_to_be_oversampled in word_line[0]:
            for j in range(n):
                data[i] += "\n"
                data[i] += line

    with open(set + "_oversampled.csv", "w") as file:
        file.writelines(data)
    file.close()

Oversampling

In [None]:
oversample_classification("H", "train_classification_" + tag_to_be_predicted, 15)
oversample = "_oversampled"

Corpus creation

In [None]:
corpus = CSVClassificationCorpus(Path('./'), {1: 'text', 0: 'label'}, delimiter= "\t", 
                                test_file="test" + "_classification_" + tag_to_be_predicted + ".csv",
                                dev_file="dev" + "_classification_" + tag_to_be_predicted + ".csv",
                                train_file="train" + "_classification_" + tag_to_be_predicted + oversample + ".csv", quoting=csv.QUOTE_NONE)

print(corpus.train[2])

In [None]:
print(len(corpus.train))
print(len(corpus.dev))
print(len(corpus.test))

In [None]:
bert_embedding = TransformerWordEmbeddings('bert-base-cased', fine_tune=True)
word_embeddings = [FlairEmbeddings('news-forward-fast'), FlairEmbeddings('news-backward-fast'), bert_embedding]
document_embeddings = DocumentLSTMEmbeddings(word_embeddings, hidden_size=512, reproject_words=True, reproject_words_dimension=256)
classifier = TextClassifier(document_embeddings, label_dictionary=corpus.make_label_dictionary(), multi_label=False)

In [None]:
#learning_rate_tsv = trainer.find_learning_rate('./')

#from flair.visual.training_curves import Plotter
#plotter = Plotter()
#plotter.plot_learning_rate(learning_rate_tsv)

In [None]:
trainer = ModelTrainer(classifier, corpus)
trainer.train('./', max_epochs=20, embeddings_storage_mode="cpu")

In [None]:
result, score = classifier.evaluate(corpus.train, mini_batch_size=32) #, out_path=f"predictions.txt")
print(result.detailed_results)

In [None]:
result, score = classifier.evaluate(corpus.dev, mini_batch_size=32) #, out_path=f"predictions.txt")
print(result.detailed_results)

In [None]:
result, score = classifier.evaluate(corpus.dev, mini_batch_size=32) #, out_path=f"predictions.txt")
print(result.detailed_results)

In [None]:
result, score = classifier.evaluate(corpus.test, mini_batch_size=32) #, out_path=f"predictions.txt")
print(result.detailed_results)

Predict the values of the whole corpus

In [None]:
def create_unlabeled_classification_dataset_for(tag, set):
    new_file = open(set + "_classification_" + tag + ".csv", "w")
    
    label = ""
    with open(set + "_" + tag + ".csv", "r") as file:
        for i, line in enumerate(file):
            if line.replace(" ", "") == "\n":
                if i == 0:
                    continue
                if label != "":
                    new_file.write("\t")
                    new_file.write("\n")
                    new_file.write("\n")
                label = ""
                continue
            word_line = line.replace("\n", "").split(" ")
            if len(word_line) < 2:
                print(word_line)
            if tag in word_line[1]:
                if label == "":
                    label = "X"
                    new_file.write(label + " \t")
                new_file.write(word_line[0] + " ")

    new_file.close()

i = 0
for part in range(1,3):
    create_unlabeled_classification_dataset_for(tag_to_be_predicted, "all_cves_tagged_" + str(part))


    from flair.datasets import CSVClassificationDataset
    name = "all_cves_tagged_" + str(part) + "_classification_" + tag_to_be_predicted + ".csv"
    corpus_to_be_classified = CSVClassificationDataset(Path('.') / name, {0: 'label', 1: 'text'}, delimiter= "\t", quoting=csv.QUOTE_NONE)


    classifier.predict(sentences=corpus_to_be_classified, mini_batch_size=1)
        
        
    new_file = open("all_cves_tagged_classified_" + str(part) + "_" + tag_to_be_predicted + ".csv", "w")
    j = 0
    with open("all_cves_tagged_" + str(part) + "_" + tag_to_be_predicted + ".csv", "r") as file:
        for i in range(len(corpus_to_be_classified)):
            for word in corpus_to_be_classified.sentences[i].to_plain_string().split(" "):
                for j, line in enumerate(file):
                    if line.replace(" ", "") == "\n":
                        new_file.write(line)
                        continue
                        
                    ref_word, tag = line.replace("\n", "").split(" ")

                    new_file.write(ref_word)
                    new_file.write(" " + tag)


                    if word.replace(" ", "") == ref_word.replace(" ", "") and tag.replace(" ", "") == tag_to_be_predicted:
                        new_file.write(":")
                        new_file.write(corpus_to_be_classified.sentences[i].get_label_names()[1].replace(" ", ""))
                        new_file.write("\n")
                        break
                    new_file.write("\n")

        for line in file:
            if line.replace(" ", "") == "\n":
                    new_file.write(line)
                    continue
            ref_word, tag = line.replace("\n", "").split(" ")
            new_file.write(ref_word)
            new_file.write(" " + tag)
            new_file.write("\n")

    new_file.close()

In [None]:
tagged_classified_1_name = "all_cves_tagged_classified_1_" + tag_to_be_predicted + ".csv"
tagged_classified_2_name = "all_cves_tagged_classified_2_" + tag_to_be_predicted + ".csv"
!cp "$tagged_classified_1_name" "$path"
!cp "$tagged_classified_2_name" "$path"

In [None]:
classifier = None
trainer = None

In [None]:
exit()

In [None]:
def create_unlabeled_classification_dataset_for(tag, set):
    new_file = open(set + "_classification_" + tag + ".csv", "w")
    
    label = ""
    with open(set + "_" + tag + ".csv", "r") as file:
        for i, line in enumerate(file):
            if line.replace(" ", "") == "\n":
                if i == 0:
                    continue
                if label != "":
                    new_file.write("\t")
                    new_file.write("\n")
                    new_file.write("\n")
                label = ""
                continue
            word_line = line.replace("\n", "").split(" ")
            
            if tag in word_line[1]:
                if label == "":
                    label = "X"
                    new_file.write(label + " \t")
                new_file.write(word_line[0] + " ")

    new_file.close()

i = 0
for part in range(1,3):
    create_unlabeled_classification_dataset_for(tag_to_be_predicted, "all_cves_tagged_" + str(part))


    from flair.datasets import CSVClassificationDataset
    name = "all_cves_tagged_" + str(part) + "_classification_" + tag_to_be_predicted + ".csv"
    corpus_to_be_classified = CSVClassificationDataset(Path('.') / name, {0: 'label', 1: 'text'}, delimiter= "\t", quoting=csv.QUOTE_NONE)

    new_file = open("all_cves_tagged_classified_" + str(part) + "_" + tag_to_be_predicted + ".csv", "w")
    j = 0
    with open("all_cves_tagged_" + str(part) + "_" + tag_to_be_predicted + ".csv", "r") as file:
        for i in range(len(corpus_to_be_classified)):
            for word in corpus_to_be_classified.sentences[i].to_plain_string().split(" "):
                for j, line in enumerate(file):
                    if line.replace(" ", "") == "\n":
                        new_file.write(line)
                        continue
                        
                    ref_word, tag = line.replace("\n", "").split(" ")

                    #if i == 159 and j< 100:
                    #    print(ref_word)
                    #    print(word)
                        
                    #if j > 20000:
                    #    print(word)
                    #    word_not_found_error
                        
                    new_file.write(ref_word)
                    new_file.write(" " + tag)


                    if word.replace(" ", "") == ref_word.replace(" ", "") and tag.replace(" ", "") == tag_to_be_predicted:
                        new_file.write(":")
                        new_file.write("N")
                        new_file.write("\n")
                        break
                    new_file.write("\n")

        for line in file:
            if line.replace(" ", "") == "\n":
                    new_file.write(line)
                    continue
            ref_word, tag = line.replace("\n", "").split(" ")
            new_file.write(ref_word)
            new_file.write(" " + tag)
            new_file.write("\n")

    new_file.close()