In [None]:
import pandas as pd
import os

# Step 1: Create a list of CSV file names
csv_files = [
    '2019.csv',
    '2020.csv',
    '2021.csv',
    '2022.csv',
    '2023.csv'
]

# Step 2: Load and concatenate all the CSV files
dataframes = [pd.read_csv(file) for file in csv_files]  # Load each CSV file into a DataFrame
merged_df = pd.concat(dataframes, ignore_index=True)    # Concatenate them into one DataFrame

# Step 3: Save the merged DataFrame to a new CSV file
merged_df.to_csv('all_years_data.csv', index=False)

print("All CSV files have been successfully merged into 'all_years_data.csv'")


In [None]:
import pandas as pd

# Step 1: Load the merged CSV file
merged_df = pd.read_csv('all_years_data.csv')

# Step 2: Delete the first five columns

merged_df.drop(merged_df.columns[:5], axis=1, inplace=True)

# Step 3: Save the updated DataFrame to a new CSV file
merged_df.to_csv('updated_file.csv', index=False)

print("The first five columns have been successfully deleted and saved to 'updated_file.csv'")


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
import numpy as np

# Load your dataset
df = pd.read_csv('updated_file.csv')

# Ensure numeric conversion with error handling
df['YEAR'] = pd.to_numeric(df['YEAR'], errors='coerce')
df['RESULT'] = pd.to_numeric(df['RESULT'], errors='coerce')

# Function to plot data and predictions for the selected groups and years
def plot_groups(groups, years):
    # Filter data based on the input groups
    group_df = df[df['RESPONDENT GROUP'].isin(groups)]
    
    # Further filter data based on the input years
    group_df = group_df[group_df['YEAR'].isin(years)]
    
    # Check if there is data available after filtering
    if group_df.empty:
        print("No data available for the selected group(s) and/or years.")
        return
    
    # Group by year and calculate the mean result
    group_df = group_df.groupby('YEAR').agg({'RESULT': 'mean'}).reset_index()

    # Data for linear regression model
    X = group_df[['YEAR']]
    y = group_df['RESULT']

    # Initialize and train the linear regression model
    model = LinearRegression()
    model.fit(X, y)

    # Future years for prediction
    future_years = np.array([[2024], [2025]])

    # Predictions for future years
    future_predictions = model.predict(future_years)

    # Create the plot
    plt.figure(figsize=(10, 6))

    # Historical data
    plt.plot(group_df['YEAR'], group_df['RESULT'], marker='o', label=f'Historical Data (Average of {len(groups)} groups)')

    # Future predictions
    plt.plot([2024, 2025], future_predictions, marker='x', linestyle='--', color='red', label='Predicted Data (2024-2025)')

    # Annotate future predictions
    for year, prediction in zip([2024, 2025], future_predictions):
        plt.annotate(f'{prediction:.2f}', (year, prediction), textcoords="offset points", xytext=(0,10), ha='center')

    # Customize the plot
    plt.title(f'Results and Predictions for Selected Groups')
    plt.xlabel('Year')
    plt.ylabel('Mean Result')
    plt.grid(True)
    plt.xticks(np.arange(int(group_df['YEAR'].min()), 2026, 1))
    plt.legend()
    plt.show()

# Get user input for the groups and years
while True:
    try:
        print("Enter the respondent groups you want to analyze:")
        groups_input = input("Enter respondent groups separated by commas (e.g., 'Group1,Group2,Group3'): ").strip()
        groups = [g.strip() for g in groups_input.split(',')]

        # Validate groups
        valid_groups = df['RESPONDENT GROUP'].unique()
        if not all(group in valid_groups for group in groups):
            print("Some of the groups do not exist in the dataset. Please check and try again.")
            continue

        print("Enter the years you want to analyze:")
        years_input = input("Enter years separated by commas (e.g., '2020,2021,2022'): ").strip()
        years = [int(y.strip()) for y Ma years):
            print("Some of the years do not exist in the dataset. Please check and try again.")
            continue

        # Call the function to plot the graph for the entered groups and years
        plot_groups(groups, years)

    except Exception as e:
        print(f"An error occurred: {e}")

    # Ask if the user wants to continue
    cont = input("Do you want to analyze other groups and years? (yes/no): ").strip().lower()
    if cont != 'yes':
        break


Enter the respondent groups you want to analyze:
