In [1]:
import pandas as pd
import ast
import os

# Load the dataset
file_path = r"D:\FINAL_DATA_FILES\final_normalized_sentiment.csv"   
df = pd.read_csv(file_path)

# Convert the 'entities' column to a list
df['entities'] = df['entities'].fillna("[]").apply(ast.literal_eval)

# List of organizations to process
organizations = ['MSFT']

# Specify the directory to save the files
save_directory = r"D:\FINAL_DATA_FILES"

for org in organizations:
    # Filter rows for the current organization
    df_org = df[df['entities'].apply(lambda x: any(org in entity for entity in x))]

    # Convert 'ts_standardized' to datetime and make a copy to avoid SettingWithCopyWarning
    df_org_copy = df_org.copy()
    df_org_copy['ts_standardized'] = pd.to_datetime(df_org_copy['ts_standardized'])

    # Calculate the mean of normalized sentiment scores
    average_sentiment_per_day = df_org_copy.groupby(df_org_copy['ts_standardized'].dt.date)['normalized_sentiment'].mean().reset_index()
    average_sentiment_per_day.rename(columns={'ts_standardized': 'ts', 'normalized_sentiment': 'Average_sentiment'}, inplace=True)
    average_sentiment_per_day['ORG'] = org

    # Convert 'ts' to datetime for consistency
    average_sentiment_per_day['ts'] = pd.to_datetime(average_sentiment_per_day['ts'])

    # Save the final dataframe for each organization
    final_file_path = os.path.join(save_directory, f'final_complete_{org}_sentiment.csv')
    average_sentiment_per_day.to_csv(final_file_path, index=False)

print("Sentiment files have been processed and saved.")

Sentiment files have been processed and saved.


# CODE FOR SAVING AND PROCESSING MULTIPLE ORG AT ONCE JUST CHANG THE ORG SYMBOL

In [15]:
import pandas as pd
import ast
import os

# Load the dataset
file_path = r"D:\FINAL_DATA_FILES\final_normalized_sentiment.csv"  
df = pd.read_csv(file_path)

# Convert the 'entities' column to a list
df['entities'] = df['entities'].fillna("[]").apply(ast.literal_eval)

# List of organizations to process
organizations = ['AAPL', 'AMZN', 'MS']

# Specify the directory to save the files
save_directory = r"D:\FINAL_DATA_FILES"

for org in organizations:
    # Filter rows for the current organization
    df_org = df[df['entities'].apply(lambda x: any(org in entity for entity in x))]

    # Convert 'ts_standardized' to datetime and make a copy to avoid SettingWithCopyWarning
    df_org_copy = df_org.copy()
    df_org_copy['ts_standardized'] = pd.to_datetime(df_org_copy['ts_standardized'])

    # Calculate the mean of normalized sentiment scores
    average_sentiment_per_day = df_org_copy.groupby(df_org_copy['ts_standardized'].dt.date)['normalized_sentiment'].mean().reset_index()
    average_sentiment_per_day.rename(columns={'ts_standardized': 'ts', 'normalized_sentiment': 'Average_sentiment'}, inplace=True)
    average_sentiment_per_day['ORG'] = org

    # Generate a complete date range from the first to the last date
    start_date = average_sentiment_per_day['ts'].min()
    end_date = average_sentiment_per_day['ts'].max()
    complete_date_range = pd.date_range(start=start_date, end=end_date)

    # Create a new dataframe with the complete date range
    complete_dates_df = pd.DataFrame(complete_date_range, columns=['ts'])
    complete_dates_df['ORG'] = org
    complete_dates_df['Average_sentiment'] = 0.5  # Neutral sentiment

    # Ensure data type consistency for merging
    average_sentiment_per_day['ts'] = pd.to_datetime(average_sentiment_per_day['ts'])

    # Merge and fill missing values
    complete_sentiment_df = complete_dates_df.merge(average_sentiment_per_day, on=['ts', 'ORG'], how='left', suffixes=('_default', ''))
    complete_sentiment_df['Average_sentiment'] = complete_sentiment_df['Average_sentiment'].fillna(complete_sentiment_df['Average_sentiment_default'])
    complete_sentiment_df.drop('Average_sentiment_default', axis=1, inplace=True)

    # Save the final dataframe for each organization
    final_file_path = os.path.join(save_directory, f'final_complete_{org}_sentiment.csv')
    complete_sentiment_df.to_csv(final_file_path, index=False)