In [5]:
import pandas as pd
import numpy as np

In [6]:
def read_and_process_csv(file_paths, journal_names):
    processed_data = []
    columns = ["Year", "Total Citations", "Journal impact factor", "JIF without self cites", 
               "5 Year Impact Factor", "Immediacy Index", "Citable items", 
               "Percent of articles in Citable items", "Average JIF Percentile", ""]

    for path, journal_name in zip(file_paths, journal_names):
        try:
            df = pd.read_csv(path, skiprows=6, skipfooter=2, engine='python', names=columns)
            df = df.drop(df.columns[-1], axis=1)  # Drop the last column
            df['Journal Name'] = journal_name  # Add the journal name column
            df.reset_index(drop=True, inplace=True)
            processed_data.append(df)
        except Exception as e:
            print(f"Error reading {path}: {e}")

    return processed_data

In [7]:
# Paths to the CSV files
file_paths = [
    "C:\\Users\\hrsto\\OneDrive - Deakin University\\Uni\\Data Science\\Y1\\T3\\SIT723 Research Training and Project\\Data\\WoS\\Impact\\HEALTH-PSYCHOL-RES#2022#ESCI#All-Years.csv",
    "C:\\Users\\hrsto\\OneDrive - Deakin University\\Uni\\Data Science\\Y1\\T3\\SIT723 Research Training and Project\\Data\\WoS\\Impact\\HEALTH-PSYCHOL-REV#2022#SSCI#All-Years.csv",
    "C:\\Users\\hrsto\\OneDrive - Deakin University\\Uni\\Data Science\\Y1\\T3\\SIT723 Research Training and Project\\Data\\WoS\\Impact\\IMPLEMENT-SCI#2022#SSCI#SCIE#All-Years-2.csv",
    "C:\\Users\\hrsto\\OneDrive - Deakin University\\Uni\\Data Science\\Y1\\T3\\SIT723 Research Training and Project\\Data\\WoS\\Impact\\PILOT-FEASIBILITY-ST#2022#ESCI#All-Years.csv",
    "C:\\Users\\hrsto\\OneDrive - Deakin University\\Uni\\Data Science\\Y1\\T3\\SIT723 Research Training and Project\\Data\\WoS\\Impact\\PSYCHOL-HEALTH#2022#SSCI#All-Years.csv",
    "C:\\Users\\hrsto\\OneDrive - Deakin University\\Uni\\Data Science\\Y1\\T3\\SIT723 Research Training and Project\\Data\\WoS\\Impact\\PSYCHOL-HEALTH-MED#2022#SCIE#SSCI#All-Years.csv",
    "C:\\Users\\hrsto\\OneDrive - Deakin University\\Uni\\Data Science\\Y1\\T3\\SIT723 Research Training and Project\\Data\\WoS\\Impact\\HEALTH-PSYCHOL#2022#SSCI#SCIE#All-Years.csv",
    "C:\\Users\\hrsto\\OneDrive - Deakin University\\Uni\\Data Science\\Y1\\T3\\SIT723 Research Training and Project\\Data\\WoS\\Impact\\HEALTH-PSYCHOL-BEHAV#2022#ESCI#All-Years.csv",
    "C:\\Users\\hrsto\\OneDrive - Deakin University\\Uni\\Data Science\\Y1\\T3\\SIT723 Research Training and Project\\Data\\WoS\\Impact\\HEALTH-PSYCHOL-OPEN#2022#ESCI#All-Years.csv",
    "C:\\Users\\hrsto\\OneDrive - Deakin University\\Uni\\Data Science\\Y1\\T3\\SIT723 Research Training and Project\\Data\\WoS\\Impact\\HEALTH-PSYCHOL-REP#2022#ESCI#All-Years.csv"
]

# Crresponding journal names
journal_names = [
    "Health Psychology Research",
    "Health Psychology Review",
    "Implementation Science",
    "Pilot and Feasibility Studies",
    "Psychology and Health",
    "Psychology, Health & Medicine",
    "Health Psychology",
    "Health Psychology and Behavioral Medicine",
    "Health Psychology Open",
    "Health Psychology Reports"
]

In [8]:
# Read and process the CSV files
dataframes = read_and_process_csv(file_paths, journal_names)

# Merge all dataframes
merged_df = pd.concat(dataframes)

# Reset the index of the merged dataframe
merged_df.reset_index(drop=True, inplace=True)

# Specify the path where you want to save the CSV file
output_file_path = r"C:\Users\hrsto\OneDrive - Deakin University\Uni\Data Science\Y1\T3\SIT723 Research Training and Project\Data\WoS\Impact\MErged\Merged Journal Impact.csv"

# Save the merged dataframe to a CSV file
merged_df.to_csv(output_file_path, index=False)

# Display merged dataframe
merged_df

Unnamed: 0,Year,Total Citations,Journal impact factor,JIF without self cites,5 Year Impact Factor,Immediacy Index,Citable items,Percent of articles in Citable items,Average JIF Percentile,Journal Name
0,2022,393,2.000,2.000,2.300,0.300,75,78.67,,Health Psychology Research
1,2021,348,,,,,27,88.89,,Health Psychology Research
2,2020,260,,,,,30,73.33,,Health Psychology Research
3,2022,3630,9.800,9.800,9.800,1.700,32,43.75,97.300,Health Psychology Review
4,2021,3753,9.638,9.217,13.543,2.478,23,39.13,95.802,Health Psychology Review
...,...,...,...,...,...,...,...,...,...,...
99,2021,728,,,,,34,82.35,,Health Psychology Open
100,2020,542,,,,,48,89.58,,Health Psychology Open
101,2022,342,2.000,1.700,1.600,0.900,16,100.00,,Health Psychology Reports
102,2021,333,,,,,32,96.88,,Health Psychology Reports
