In [None]:
import pandas as pd
import numpy as np
from tqdm import tqdm

In [None]:
# Read csv files
df = pd.read_csv("Analysis_News.csv")

# Subtract the positive_score and negative_score for each day to create a new column "my_score"
df["my_score"] = df["positive_score"] - df["negative_score"]

# Convert the "Date" column to date format
df["Date"] = pd.to_datetime(df["Date"])

# Normalize the "my_score" column
df["my_score"] = (df["my_score"] - df["my_score"].min()) / (df["my_score"].max() - df["my_score"].min())

# Calculate the daily average
daily_average = df.groupby("Date")["my_score"].mean()

# Generate a new date range, including missing dates
date_range = pd.date_range(start=df["Date"].min(), end=df["Date"].max(), freq="D")

# Create a new dataframe with a new date range
new_df = pd.DataFrame(date_range, columns=["Date"])

# Combine the daily averages into a new dataframe
new_df = new_df.merge(daily_average, on="Date", how="left")

# Use 0.5 to fill in the missing "my_score" value
new_df["my_score"].fillna(0.5, inplace=True)

# Show new dataframe
print(new_df)




In [None]:
# Set "Date" as an index
new_df.set_index("Date", inplace=True)

# Mark weekends and Mondays
new_df["is_weekend"] = new_df.index.weekday.isin([5, 6])
new_df["is_monday"] = new_df.index.weekday == 0

# Calculate the average of the previous Saturday, previous Sunday and Monday for each Monday

monday_averages = []
for i, row in tqdm(new_df.iterrows(), total=new_df.shape[0]):
    if row["is_monday"]:
        weekend_dates = [i - pd.Timedelta(days=1), i - pd.Timedelta(days=2)]
        weekend_scores = new_df[new_df.index.isin(weekend_dates)]["my_score"]
        monday_averages.append((row["my_score"] + weekend_scores.sum()) / 3)
        # break
# Replace Monday's value with the average of last Saturday, last Sunday and Monday's three days
new_df.loc[new_df["is_monday"], "my_score"] = monday_averages

# Delete data from the weekend
new_df = new_df[~new_df["is_weekend"]]

# Delete the "is_weekend" and "is_monday" columns
new_df.drop(["is_weekend", "is_monday"], axis=1, inplace=True)



In [None]:
# Show new dataframe
print(new_df)

# Store new_df as a CSV file
new_df.to_csv("cooked_text_score.csv")