# In this notebook are stored the classes that handle the logic of experiments. 

This notebook contains these classes:

- **CExperimentResult** handles exporting experiment results into tables and figures
- **CExperiment** handles the logic of the experiments. Performs text processing and feature extraction and calls the classification models
- **CDataset** loads the datasets into a memory and creates instances of the classes above to perform the experiments and save the results.

In [None]:
run ./Models.ipynb

In [None]:
run ./Text_Preprocessing.ipynb

In [None]:
#Define basic constants related to improting the dataset, datasplitting and figures
RELIABILITY = 'label'
CONTENT = 'article'
RD_SEED = 333
TEST_RATIO = 0.2
CM_LABELS = ['fake news','reliable']

In [None]:
import seaborn as sns

In [None]:
class CExperimentResult:
    '''
    The purpose of this class is to save and evaluate results for one experiment per model.

    Atributes
    --------
    dataset_name: string
            Name of dataset which is used in the experiment.
    model_name: string
            Name of machine learning or deep lerning model which is used in concrete experiment.
    dataset_path: string
            Relative path to all files and folders related to used dataset.
    log_path: string
            Path to log file in which progress of experiments is save.
    results_per_model: dictionary
            This dictionary creates one dataframe containing results per model. 
    current_model: string
            Model of which results are being saved.
    results_per_model: dictionary
            Dictionary that stores results per one model.
    
    Methods
    -------
    export_results_to_latex(model:str)
        Exports final results per model to latex table and saves it to file.
        Returns: None
    
    evaluate_concrete_experiment(ypredictions:list[np.array],ytest:list[np.array], preprocessing:str)
        Evaluates results of machine learning experiment per one preprocessing. Computes average confusion matrix and evalutaion metrics. 
        Finally, saves important results to file and result dataframe.
        Returns: None
    save_con_matrix()
        Adjust computed confusion matrix for exporting and saves it to the dataset folder.
        Returns: None
    
    add_experiment_results()
        Test if number of prediciton is the same as split test data. If not, rises ValueError.
        Finally passes the data to evaluation.
        Returns: None
        
    '''
    

    def __init__(self, dataset_name:str,model_name:str,dataset_path:str,log_path:str)->None:
        '''
        The purpose of this class is to save and evaluate results for one experiment per model.
        
        Parameters
        ----------
        dataset_name: string
                Name of dataset which is used in the experiment.
        model_name: string
                Name of machine learning or deep lerning model which is used in concrete experiment.
        dataset_path: string
                Relative path to all files and folders related to used dataset.
        log_path: string
                Path to log file into which the progress of all experiments is saved.
        
        Returns
        -------
        None
        '''

        self.results_per_model = {}
        self.dataset_name = dataset_name
        self.dataset_path = dataset_path
        self.log_path = log_path
        self.current_model = model_name
        self.results_per_model[model_name] = pd.DataFrame(columns=['Preprocessing','Accuracy', 'Precision', 'Recall', 'F1-Score'])

        
        
    def export_results_to_latex(self, model:str)->None:
        '''
        Exports results of experiment to latex table and prints some info about experiments to console.

        Parameters
        ----------
        model: string
                Classification model used for experiments.
        
        Returns
        -------
        None
        '''

        df = self.results_per_model[self.current_model]
        latex_table = df.to_latex(index=False,escape=False)
        #some minor changes in latex appearance
        latex_table = latex_table.replace('\\toprule', '')
        latex_table = latex_table.replace('\\bottomrule', '')
        latex_table = latex_table.replace('\\midrule', '\\midrule \\midrule')
        latex_table = latex_table.replace('{lllll}', '{l||c|c|c|c}')
        
        acc_max = df['Accuracy'].astype(float).idxmax()
        f1_max = df['F1-Score'].astype(float).idxmax()
    
        print('Preprocessing with highest accuracy: ' +  str(df.at[acc_max, 'Preprocessing']))
        print('Preprocessing with highest f1-score: ' +  str(df.at[f1_max, 'Preprocessing']))
        print("")
        
        with open(self.dataset_path + 'tables/'+ model + '_experiments_results.txt', 'w') as f:
            f.write(latex_table)
            f.close()
        return list(df.loc[acc_max])
    
    def evaluate_concrete_experiment(self,ypredictions:list[np.array],ytest:list[np.array], preprocessing:str)-> None:
        '''
        Evaluates results of machine learning experiment per one preprocessing. Computes average confusion matrix and evalutaion metrics. 
        Finally, saves important results to file and result dataframe.

        Parameters
        ----------
        ypredictions: list
                Array of predicions computed by classification model per given preprocessing.
        ypredictions: list
                Array of original test data per given preprocessing.
        preprocessing: string
                Text that describes used preprocessing in the experiment.
        
        Returns
        -------
        None
        '''
        
        def save_con_matrix(cm:np.array)->None:
            '''
            Adjusts computed confusion matrix for exporting and saves it to the dataset folder.

            Parameters
            ----------
            cm: numpy.array
                    Computed confusion matrix.
            
            Returns
            -------
            None
            '''
            
            plt.ioff()
            plt.cla()
            plt.close()
            
            cm_path = self.dataset_path + 'figures/' + self.current_model+ ' '+ preprocessing+'.jpg'
            plt.figure(figsize=(8,6))
            cm = sns.heatmap(cm, annot=True, cmap='Blues', fmt='.0f',xticklabels=CM_LABELS, yticklabels=CM_LABELS)
            cm.set_yticklabels(cm.get_yticklabels(), rotation = 0)
            plt.xlabel('Predicted label')
            plt.ylabel('True label')
            plt.savefig(cm_path,)
            
            plt.cla()
            plt.close()
            
        all_metrics = []
        cm = np.zeros((2,2))
        for i in range(0,len(ypredictions)):
   
            accuracy = metrics.accuracy_score(ypredictions[i],ytest[i])
            precision =metrics.precision_score(ypredictions[i],ytest[i])
            recall = metrics.recall_score(ypredictions[i],ytest[i])
            f1score = metrics.f1_score(ypredictions[i],ytest[i])
            all_metrics.append([accuracy,precision,recall,f1score])
            cm += confusion_matrix(ytest[i], ypredictions[i])
        cm = cm/len(ypredictions)
        with open(self.log_path, 'a') as f:
            f.write(preprocessing + '\n')
            for i in range(0,len(all_metrics)):
                for v in all_metrics[i]:
                    f.write('%.3f'%v+'\t')
                f.write('\n')        
            
            f.write('-------------------------------\n')
            f.close()
       
        all_metrics = np.mean(all_metrics,axis=0).tolist()
        with open(self.log_path, 'a') as f:
            for i,v in enumerate(all_metrics):
                all_metrics[i] = '%.3f'%v
                f.write(all_metrics[i] + '\t')
                
            f.write('\n')
            f.close()
              
        all_metrics.insert(0,preprocessing)
        self.results_per_model[self.current_model].loc[len(self.results_per_model[self.current_model])] = all_metrics
        save_con_matrix(cm)

        
   

    def add_experiment_results(self,preprocessing:str, ytestdata:list[np.array],ypredictions:list[np.array]) ->None:
        '''
        Test if number of prediciton is the same as split test data. If not, rises ValueError.
        Finally passes the data to evaluation.

        Parameters
        ----------
        preprocessing: string
                Text that describes used preprocessing in the experiment.
        ytest: list
                Array of original test data per given preprocessing.
        ypredictions: list
                Array of predicions computed by classification model per given preprocessing.
                
        Rises
        -----
        ValueError
            If the number of split of dataset and number of predictions differs.
            
        Returns
        -------
        None

        '''
        
        if len(ytestdata) != len(ypredictions):
             raise ValueError('Different sizes of test and prediction') 
        
        self.evaluate_concrete_experiment(ypredictions,ytestdata,preprocessing)
    


In [None]:
class CExperiment:
    '''
    The purpose of this class is to perform set of experiments with given dataset. This includes preprocessing functions.
    
    Atributes
    --------
    dataset: pandas.DataFrame
            Dataset of news articles that is being experimented with.
    dataset_name: string
            Name of dataset that is being experimented with.        
    log_path: string
            Path of the log file of concrete dataset.
    dataset_folder: string
            Relative path to all files and folders related to used dataset.
    language: string
            Language in which the articles are written.
    EMBEDDING_DIM: int
            Size of vector from pretrined embedding.
    pretrained_embedding: dictionary
            Pretrained embedding that contains vectors per word in given language.
    preprocessing_function_dict: dictionary
            Maps name of preprocessing to its function. Is used in experiments.
    ML_model_function_dict: dictionary
            Maps name of machine learning model to its function. Is used in experiments.
    DL_model_function_dict: dictionary
            Maps name of deep learning model to its function. Is used in experiments.
    total_experiments: int
            The number of times each experiment is be repeated until the results are obtained. Default value is 3.
    Methods
    -------
    split_data(given_dataset:pd.DataFrame)
            Splits the given dataset into train and test part. Is called multiple times troughout the experiments.
            Returns: list of pandas.Series
    refresh_preprocessing( model:str) -> list of strings
            Creates list of possible preprocessing for given model.
            Returns list of all preprocessing possibilites from given group.
    apply_feature_model(Xtrain:pd.Series,Xtest:pd.Series,fm:str,ngram_size:int)
             Apply chosen model for feature extraction to train and test set. 
             Returns: Processed train and test data.
    apply_embedding(Xtrain:pd.Series,Xtest:pd.Series,fm:str)
            Apply word embedding to train and test set.
            Returns: Processed train and test data.
    preprocess_DL(Xtrain : pd.Series, Xtest : pd.Series, text_norm : str, fm:str,max_len:int)
            Process input data to the form processable by deep learning models.
            Returns: Transformed data that are ready for experiments with classification models and embedding layer which is later passed into DL models.
    preprocess(self,Xtrain : pd.Series, Xtest : pd.Series, text_norm : str, ngram_size: int, fm:str,max_len,embedding:bool)
            Process input data to the form processable by machine learning models.
            Returns: Transformed data that are ready for experiments with classification models.
    experiment(model:str, experiment_result:CExperimentResult,max_len:int)
            Performs machine learning experiments with all possible preprocessing methods.
            Returns: Measured average time of an experiment.
    
    experiment_DL(model:str, experiment_result:CExperimentResult,max_len:int)
             Performs all experiments on deeplearning models, measures time of each and save the results.
             Returns: Measured average time of an experiment.
            
    '''

    def __init__(self,dataset:pd.DataFrame,log_path:str,dataset_folder:str,dataset_name:str,language:str='en')-> None:
        '''
        The purpose of this class is to perform set of experiments with given dataset. This includes preprocessing functions.
        
        Parameters
        ----------
        dataset: pandas.DataFrame
            Dataset of news articles that is being experimented with.
        dataset_name: string
                Name of dataset that is being experimented with.        
        log_path: string
                Path of the log file of concrete dataset.
        dataset_folder: string
                Relative path to all files and folders related to used dataset.
        language: string
                Language in which the articles are written.
                
        
        Returns
        -------
        None
        
        '''

        
        self.dataset = dataset
        self.dataset_name = dataset_name
        self.preprocessing_function_dict = {'stemming':stem,'lemmatization':lemmatize,'no':idenity_function_text_tn, 'tokenize':tokenize,
                                            'bag-of-words':create_BOW, 'tf–idf':create_tf_idf, 'Word2Vec': create_word2vec,'GloVe':idenity_function}
        self.ML_model_function_dict = {'Naive Bayes':CGausian_Naive_Bayes,'Random Forest': CRandom_Forest}
        self.DL_model_function_dict = {'LSTM': CLSTM, 'CNN': CCNN}
        self.language = language
        self.log_path = log_path
        self.dataset_folder = dataset_folder
        self.EMBEDDING_DIM = 100 if language == 'en' else 300
        self.pretrained_embedding = EN_GL_EMBEDDING_IDX if language=='en' else CS_GL_EMBEDDING_IDX
        self.total_experiments = 1
        

    def split_data(self, given_dataset:pd.DataFrame) -> list[pd.Series]:
        '''
        Splits the given dataset into train and test part. Is called multiple times troughout the experiments.
        
        Paramters
        ---------
        given_dataset: pandas.DataFrame
                Whole dataset of articles and reliabilities.


        Returns
        -------
        list of pandas.Series
            Train and test data with separated dependent values.
        
        '''
        Xdata = given_dataset[CONTENT]
        ydata = given_dataset[RELIABILITY]
        Xtrain, Xtest, ytrain, ytest = train_test_split(Xdata, ydata, test_size=TEST_RATIO, random_state=RD_SEED) 
        return Xtrain,Xtest,ytrain,ytest


    def refresh_preprocessing(self, model:str) -> list[str]:
        '''
        Creates list of possible preprocessing for given model. Returns list of all preprocessing possibilites from given group.
        Paramters
        ---------
        model: string
                Name of current model that is being used for experimenting.

        Returns
        -------
        list of strings
            list of possible preprocessing of the given groups.
        
        '''
        text_normalization = ['stemming', 'lemmatization'] 
        if model not in ['Naive Bayes', 'Random Forest']:
            text_normalization.insert(0,'no')
        
       
        ngrams = ['tokenize' for _ in range(3)]
            
        feature_models = ['bag-of-words', 'tf–idf', 'Word2Vec','GloVe'] if model in ['Naive Bayes', 'Random Forest'] else ['Word2Vec','GloVe']
        
        return text_normalization,ngrams,feature_models


    def apply_feature_model(self,Xtrain:pd.Series,Xtest:pd.Series,fm:str,ngram_size:int) -> list[pd.Series]:
        '''
        Apply chosen model for feature extraction to train and test set. Returns processed train and test data.
        
        Paramters
        ---------
        Xtrain:pd.Series
                Preprocessed train data.
        Xtest:pd.Series
                Preprocessed test data.
        fm:str
                Feature model chosen for this experiment.
        ngram_size:int
                Size of n-grams the taxt was split into.
        
        Returns
        -------
        list of pd.Series
                Train and test data transformed by feature model.
                
        '''
        
        Xtrain,vectorizer = self.preprocessing_function_dict[fm](Xtrain,(ngram_size,ngram_size))
        Xtest = apply_vectorizer(Xtest,vectorizer)
        
        return Xtrain, Xtest
    def apply_embedding(self,Xtrain:pd.Series,Xtest:pd.Series,fm:str) ->list[pd.Series]:
        '''
        Apply word embedding to train and test set. Returns processed train and test data.
        
        Paramters
        ---------
        Xtrain:pd.Series
                Preprocessed train data.
        Xtest:pd.Series
                Preprocessed test data.
        fm:str
                Name of word embedding in this experiment.
                
        Returns
        -------
        list of pd.Series
                Train and test data transformed by word embedding..
                
        '''
    

        
        emb_vectors = self.preprocessing_function_dict[fm](Xtrain,self.EMBEDDING_DIM)

        
        emb_vectors = emb_vectors.wv if fm == 'Word2Vec' else self.pretrained_embedding
        
        Xtrain = transform_to_vec(Xtrain,emb_vectors,self.EMBEDDING_DIM)
        Xtest = transform_to_vec(Xtest,emb_vectors,self.EMBEDDING_DIM)
        
        return Xtrain, Xtest

    def preprocess_DL(self,Xtrain : pd.Series, Xtest : pd.Series, text_norm : str, fm:str,max_len:int) -> tuple[list[pd.Series],keras.layers.Embedding]:
        '''
        Process input data to the form processable by deep learning models.
        
        Paramters
        ---------
        Xtrain:pd.Series
                Raw train data.
        Xtest:pd.Series
                Raw test data.
        text_norm: str
                Name of text normaliztion that is being applied.
        fm: str
                 Name of feature extraction that is being applied.
        max_len:int
                Maximal number of words in article after stopwords removal.
        Returns
        -------
        list of pd.Series
                Train and test data transformed to experiments.
        
        keras.layers.Embedding
                Embedding layer that is passed to neural network model.
        '''



        
        Xtrain  = truncate_articles(remove_stop_words(remove_special_characters(Xtrain),self.language),max_len)
        Xtest  = truncate_articles(remove_stop_words(remove_special_characters(Xtest),self.language),max_len)
        Xtrain = self.preprocessing_function_dict[text_norm](Xtrain,self.language,True) 
        Xtest = self.preprocessing_function_dict[text_norm](Xtest,self.language,True)
        
        vectorizer = create_vectorizer(Xtrain)

        if fm =='GloVe':
            embedding_layer =  create_GloVeWE_layer(vectorizer,self.pretrained_embedding,max_len,self.EMBEDDING_DIM)
        else:
            embedding_layer =  create_W2V_layer(Xtrain,vectorizer,max_len,self.EMBEDDING_DIM)
            
        Xtrain = vectorize_articles(Xtrain,vectorizer)
        Xtest = vectorize_articles(Xtest,vectorizer)
        
        #display(Xtrain)   
        return Xtrain,Xtest,embedding_layer


        
    
    def preprocess(self,Xtrain : pd.Series, Xtest : pd.Series, text_norm : str, ngram_size: int, fm:str,max_len,embedding:bool=False) -> list[pd.Series]:
        '''
        Process input data to the form processable by machine learning models.
        
        Paramters
        ---------
        Xtrain:pd.Series
                Raw train data.
        Xtest:pd.Series
                Raw test data.
        text_norm: str
                Name of text normaliztion that is being applied.
        ngram_size: int
                Size of n-gram in which the text is split.
        fm: str
                 Name of feature extraction that is being applied.
        max_len:int
                Maximal number of words in article after stopwords removal.
        embedding: bool
                Marker that states whether the text is processed with word embedding.
        Returns
        -------
        list of pd.Series
                Train and test data transformed to experiments.
        '''

        
      
        if embedding:
            Xtrain  = truncate_articles(remove_stop_words(remove_special_characters(Xtrain),self.language),max_len)
            Xtrain = self.preprocessing_function_dict[text_norm](Xtrain,self.language,True) if fm != 'GloVe' else Xtrain
        
            Xtest  = truncate_articles(remove_stop_words(remove_special_characters(Xtest),self.language),max_len)
            Xtest = self.preprocessing_function_dict[text_norm](Xtest,self.language,True) if fm != 'GloVe' else Xtest
            return self.apply_embedding(Xtrain,Xtest,fm)
            
        Xtrain  = tokenize(truncate_articles(remove_stop_words(remove_special_characters(Xtrain),self.language),max_len),ngram_size)
        Xtrain = self.preprocessing_function_dict[text_norm](Xtrain,self.language,False)

        Xtest  = tokenize(truncate_articles(remove_stop_words(remove_special_characters(Xtest),self.language),max_len),ngram_size)
        Xtest = self.preprocessing_function_dict[text_norm](Xtest,self.language,False)

        return self.apply_feature_model(Xtrain,Xtest,fm,ngram_size)
   
    
    def experiment(self,model:str, experiment_result:CExperimentResult,max_len:int) -> None:
        '''
        Performs machine learning experiments with all possible preprocessing.

        Parameters
        ----------
        model:str
                Name of classification model that is used for experiment.
        experiment_result:CExperimentResult
                Object that stores the results of every experiment.
        max_len:int
                Maximal allowed lenght of article for experiment.
                
        Returns
        -------
        None
        '''
        text_normalization,ngrams,feature_models= self.refresh_preprocessing(model)
       
        with open(self.log_path, 'a') as f:
            f.write(model+'\n')
            f.write('\n')
            f.close()
        total_experiments = 3
        time_measuring = []
        for tn in text_normalization:
            for fm in feature_models:
                for ng_size in range(1,4):
                    preprocessing_text = tn + ' ' +  str(ng_size) + '-grams' +' ' + fm
                    embedding = False
                    if fm in ['Word2Vec','GloVe']: 
                        if ng_size < 3 or fm =='GloVe' and tn != 'lemmatization':
                            continue
                        embedding = True
                        preprocessing_text = tn  +' ' + fm if fm != 'GloVe' else 'pretrained ' + fm
                    y_predictions = []
                    y_test_data = []
                    print(preprocessing_text)
                    for i in range(0,self.total_experiments):
                        start = time.time()
                        Xtrain, Xtest, ytrain, ytest = self.split_data(self.dataset)
                        Xtrain, Xtest = self.preprocess(Xtrain,Xtest, tn, ng_size,fm,max_len,embedding)
                        ml = self.ML_model_function_dict[model](Xtrain,Xtest,ytrain)                        
                        y_test_data.append(ytest.to_numpy())
                        y_predictions.append(ml.make_prediciton())
                        
                        end = time.time()
                        time_measuring.append(end-start)
                    experiment_result.add_experiment_results(preprocessing_text,y_test_data,y_predictions)
                
                    
        return np.mean(time_measuring)

    def experiment_DL(self,model:str, experiment_result:CExperimentResult,max_len:int) -> None:
        
        '''
        Performs all experiments on deeplearning models, measures time of each and save the results.
        
        Parameters
        ----------
        model:str
                Name of classification model that is used for experiment.
        experiment_result:CExperimentResult
                Object that stores the results of every experiment.
        max_len:int
                Maximal allowed lenght of article for experiment.
        
        Returns
        -------
        None
        '''
        text_normalization,ngrams,feature_models= self.refresh_preprocessing(model)
       
        with open(self.log_path, 'a') as f:
            f.write(model+'\n')
            f.write('\n')
            f.close()
        embedding = False

        total_experiments = 3
        time_measuring = []
        for fm in feature_models :
            for tn in text_normalization:
                preprocessing_text = tn  +' ' + fm if tn != 'no' else fm
                if fm == 'GloVe':
                    if tn != 'no':
                        break
                    preprocessing_text = 'pretrained ' + fm 

                y_predictions = []
                y_test_data = []
                print(preprocessing_text)
                
                for i in range(0,self.total_experiments):
                    start = time.time()
                    Xtrain, Xtest, ytrain, ytest = self.split_data(self.dataset)
                    
                    Xtrain, Xtest, embedding_layer = self.preprocess_DL(Xtrain,Xtest,tn,fm,max_len)
                    dl = self.DL_model_function_dict[model](Xtrain, Xtest,ytrain,embedding_layer,max_len,preprocessing_text,self.log_path,self.dataset_folder)
                    y_test_data.append(ytest.to_numpy())
                    a = y_predictions.append(dl.make_prediciton())
                    end = time.time()
                    time_measuring.append(end-start)
                experiment_result.add_experiment_results(preprocessing_text,y_test_data,y_predictions)
                
        return np.mean(time_measuring)
            
                   

In [None]:
class CDataSet:
    '''
    The purpose of this class is to load dataset for experiments and perform experiments on it.

    Atributes
    --------
    dataset_name: str
            The name of chosen dataset.
    dataset: pandas.Dataframe  
            Data from the chosen dataset.
    dataset_path: string
            Relative path to csv file that contains dataset data.
    output_folder: string
            Relative path folder into which we save info about experiments.
    log_path: string
            Path to log file in which progress of experiments is save.
    max_len: int
            Maximal number of words that can be used in experiment. Default value is set to 250.
    language: string
            Language used in articles. Can be only English (en) or Czech (cs).
    total_model_results: pandas.DataFrame
            This DataFrame saves average results of all metrics per all models.
    time_measured: pandas.DataFrame
            This DataFrame saves average time spent on experiment per model.

    Methods
    -------
    create_log_file()
        Creates log text file for given dataset.
        Returns: None
    export_final_results_to_latex()
        Exports final results to latex table. Finally, saves the graph of the results.
        Returns: None
    conduct_ML_experiments()
        Performs experiments with machine learning models on given dataset. 
        Writes info about experiment progress to console. Also measures time of all experiments.
        Returns: None
    conduct_DL_experiment()
        Performs experiments with deep learning models on given dataset. Writes info about experiment progress to console. 
        Also measures time of all experiments.
        Returns: None
    perform_experiments()
        Performs all experiments and saves the results into specified files.
        Returns: None
    '''

    def __init__(self, dataset_name:str, dataset_path:str,output_folder:str,max_len:int=250, language:str = 'en')->None:
        '''
        The purpose of this class is to load dataset for experiments and perform experiments on it.
        Parameters
        ----------
        dataset_name: str
            The name of chosen dataset.
        dataset_path: string
                Relative path to csv file that contains dataset data.
        output_folder: string
                Relative path folder into which we save info about experiments.
        max_len: int
                Maximal number of words that can be used in experiment. Default value is set to 250.
        language: string
                Language used in articles. Can be only English (en) or Czech (cs).
                
        Rises
        -----
        ValueError
                If specified language is not availibe or max length of dataset is too short.
        
        Returns
        -------
        None
        '''
        if language not in ['en','cs'] or max_len < 0:
            raise ValueError('You have entered the wrong language!') 
        
        self.dataset_name=dataset_name
        self.dataset_path= dataset_path
        self.dataset = pd.read_csv(dataset_path,sep=',',on_bad_lines='skip').dropna()
        self.output_folder = output_folder
        self.total_model_results = pd.DataFrame(columns=['Model','Accuracy', 'Precision', 'Recall', 'F1-Score'])
        self.time_measured = pd.DataFrame(columns = [ '','Naive Bayes', 'Random Forest','CNN', 'LSTM'])

        def create_log_file()-> None:
            '''
            Creates log text file for given dataset.
            Returns
            -------
            None
            '''
            new_log_path = './' + self.output_folder+'/'+ dataset_name+'_log.txt'
            f = open(new_log_path, 'w')
            f.write('Created log file for dataset:\n' + dataset_name+'\n')
            f.close()
            self.log_path = new_log_path
        create_log_file()
        self.language = language
        self.max_len = max_len
        
    def export_final_results_to_latex(self)-> None:
        '''
        Exports final results to latex table. Finally, saves the graph of the results.

        Returns
        -------
        None
        '''
        latex_table = self.total_model_results.to_latex(index=False,escape=False)
        latex_table = latex_table.replace('{lllll}', '{|l|c|c|c|c|}')
        with open(self.output_folder +  'tables/per_model_results.txt', 'w') as f:
           f.write(latex_table)
           f.close()

        self.time_measured.loc[0,''] = 'Avg. time per experiment (s)'
        latex_time = self.time_measured.to_latex(index=False,escape=False)
        latex_time = latex_time.replace('{lllll}', '{l|c|c|c|c|}')
        latex_time = latex_time.replace('\toprule', '')
        latex_time = latex_time.replace('\bottomrule', '')
        with open(self.output_folder +  '/tables/avg_time_results.txt', 'w') as f:
           f.write(latex_time)
           f.close()
        

        
        for c in ['Accuracy', 'Precision', 'Recall', 'F1-Score']:
            self.total_model_results[c]= self.total_model_results[c].astype(float)
        plt.ioff()
        plt.cla()
        plt.close()
        self.total_model_results =self.total_model_results.set_index('Model')
        self.total_model_results.plot(kind='bar')
        plt.legend(loc='upper left',bbox_to_anchor=(1.01, 1))
        plt.xlabel('Model')
        plt.xticks(rotation=0)
        path = self.output_folder + 'figures/'+'all_models_results.jpg'
        plt.savefig(path,bbox_inches='tight')
        plt.cla()
        plt.close()
        
        
    def conduct_ML_experiments(self)-> None:
        '''
        Performs experiments with machine learning models on given dataset. 
        Writes info about experiment progress to console. Also measures time of all experiments.
        
        Returns
        -------
        None
        '''
        ML_MODELS = [ 'Naive Bayes', 'Random Forest']
        print('Performing ML experiments...')
        
        for m in ML_MODELS:
            print(m + '\n')
            new_experiment= CExperiment(self.dataset.copy().sample(frac=1),self.log_path,self.output_folder,self.dataset_name,language=self.language)
            experiment_result = CExperimentResult(self.dataset_name,m,self.output_folder,self.log_path)
            avg_time = new_experiment.experiment(m,experiment_result, self.max_len)
            
            self.time_measured.loc[0,m] = str(round(avg_time,2))
            
            results = experiment_result.export_results_to_latex(m)
            results[0] = m
            self.total_model_results.loc[len(self.total_model_results)] = results
            
        
    
    def conduct_DL_experiments(self) -> None:
        '''
        Performs experiments with deep learning models on given dataset. Writes info about experiment progress to console. 
        Also measures time of all experiments.
        
        Returns
        -------
        None
        '''
        
        print('Performing DL experiments...')
        DL_MODELS = [ 'CNN', 'LSTM']
        
        for m in DL_MODELS:
            print(m + '\n')
            new_experiment= CExperiment(self.dataset.copy().sample(frac=1),self.log_path,self.output_folder,self.dataset_name)
            
            experiment_result = CExperimentResult(self.dataset_name,m,self.output_folder,self.log_path)
            avg_time = new_experiment.experiment_DL(m,experiment_result,self.max_len)
            
            self.time_measured.loc[0,m] = str(round(avg_time,2))
            results = experiment_result.export_results_to_latex(m)
            results[0] = m
            
            self.total_model_results.loc[len(self.total_model_results)] = results
            
          
                   
    def perform_experiments(self)-> None:
        '''
        Performs all experiments and saves the results into specified files.

        Returns
        -------
        None
        '''
        self.conduct_ML_experiments()
        self.conduct_DL_experiments()
        self.export_final_results_to_latex()  