In [1]:
#Session 1: Convert RIS to CSV

In [None]:
import pandas as pd

def ris_to_dataframe(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        ris_data = file.readlines()

    records = []
    record = {}

    for line in ris_data:
        if line.startswith("ER"):
            records.append(record)
            record = {}
        elif line.startswith("TY"):
            record['Type'] = line[6:].strip()
        elif line.startswith("AU"):
            if 'Authors' in record:
                record['Authors'].append(line[6:].strip())
            else:
                record['Authors'] = [line[6:].strip()]
        else:
            key = line[:2]
            value = line[6:].strip()
            if key in record:
                if isinstance(record[key], list):
                    record[key].append(value)
                else:
                    record[key] = [record[key], value]
            else:
                record[key] = value

    df = pd.DataFrame(records)
    df['Authors'] = df['Authors'].apply(lambda x: '; '.join(x) if isinstance(x, list) else x)

    return df

def convert_ris_to_csv(ris_file_path, csv_file_path):
    df = ris_to_dataframe(ris_file_path)
    df.to_csv(csv_file_path, index=False)

# Path to the RIS file and the destination path of the CSV file
ris_file_path = 'file.ris'
csv_file_path = 'file.csv'

# Converting RIS to CSV
convert_ris_to_csv(ris_file_path, csv_file_path)

In [None]:
#Session 2: Merge CSV Files

In [2]:
import pandas as pd

def merge_csv_files(file_path1, file_path2, output_path):
    df1 = pd.read_csv(file_path1)
    df2 = pd.read_csv(file_path2)
    merged_df = pd.concat([df1, df2], ignore_index=True)
    merged_df.to_csv(output_path, index=False)

# Paths to the CSV files and the destination path for the merged file
csv_file_path1 = 'file_wos.csv'
csv_file_path2 = 'file_scopus.csv'
merged_csv_file_path = 'merge.csv'

# Merging the CSV files
merge_csv_files(csv_file_path1, csv_file_path2, merged_csv_file_path)

In [None]:
#Session 3: Classify Relevant and Irrelevant Articles

In [None]:
import pandas as pd

# Load the existing CSV file
file_path = 'merge.csv'  # Update the path as needed
data = pd.read_csv(file_path)

# Add a new column "label" and initialize with null values
data['label'] = None

# Manually classify ten articles as relevant (1) and ten as irrelevant (0)
# Update the indices as necessary to select the desired articles
# Assuming the indices of relevant and irrelevant articles are known

relevant_indices = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]  # Update with correct indices
irrelevant_indices = [10, 11, 12, 13, 14, 15, 16, 17, 18, 19]  # Update with correct indices

data.loc[relevant_indices, 'label'] = 1
data.loc[irrelevant_indices, 'label'] = 0

# Save the updated CSV file
data.to_csv('articles.csv', index=False)