In [33]:
import pandas as pd
from tabulate import tabulate
import plotly.express as px

# Load the CSV file
data = pd.read_json("/Users/jackrobertson/100-day-analysis-site/100-day-analysis/data/DeepDive100Days/filtered_cleaned_data.json") 

# Preview the first few rows
data.head()

Unnamed: 0,RegulatedEntityName,RegulatedEntityType,Value,AcceptedDate,ReceivedDate,ReportedDate,IsReportedPrePoll,ReportingPeriodId,ReportingPeriodName
0,Conservative and Unionist Party,Political Party,5000.0,2024-06-29,2024-06-23,2024-07-29,,3875,Q2 2024
1,Liberal Democrats,Political Party,89021.49,2024-06-29,2024-06-29,2024-07-29,,3875,Q2 2024
2,Conservative and Unionist Party,Political Party,4800.0,2024-06-29,2024-06-22,2024-07-29,,3875,Q2 2024
3,Conservative and Unionist Party,Political Party,3500.0,2024-06-29,2024-06-03,2024-07-29,,3875,Q2 2024
4,Conservative and Unionist Party,Political Party,5000.0,2024-06-29,2024-06-16,2024-07-29,,3875,Q2 2024


## Total Donations for the 100 days per party 

In [34]:
# Convert the values in the 'Value' column to numeric, handling non-numeric values
data['Value'] = pd.to_numeric(data['Value'].replace('[\£,]', '', regex=True), errors='coerce')

# List of parties to keep
parties_to_keep = [
    'Conservative and Unionist Party',
    'Green Party',
    'Labour Party',
    'Liberal Democrats',
    'Reform UK'
]

# Filter the DataFrame to keep only the specified parties
filtered_data = data[data['RegulatedEntityName'].isin(parties_to_keep)]

# Group by party and sum the values
aggregated_data = filtered_data.groupby('RegulatedEntityName')['Value'].sum().reset_index()

# Rename the columns for clarity
aggregated_data.columns = ['Party', 'Total Value']

# Display the filtered results in a readable format
print(tabulate(aggregated_data, headers='keys', tablefmt='pretty', floatfmt=".2f"))

+---+---------------------------------+-------------+
|   |              Party              | Total Value |
+---+---------------------------------+-------------+
| 0 | Conservative and Unionist Party | 3770423.17  |
| 1 |           Green Party           |  137419.47  |
| 2 |          Labour Party           | 10884881.07 |
| 3 |        Liberal Democrats        | 2376081.48  |
| 4 |            Reform UK            |  1633360.0  |
+---+---------------------------------+-------------+


## Cumulative Donations Accepted Past 4 Jun Over Time by Party

In [41]:
import pandas as pd
import plotly.express as px

# Assuming 'data' is your loaded DataFrame
# Convert 'Value' to numeric if not already done, and 'AcceptedDate' to datetime format
data['Value'] = pd.to_numeric(data['Value'], errors='coerce')
data['AcceptedDate'] = pd.to_datetime(data['AcceptedDate'], errors='coerce')

# Filter for specific parties
parties_to_keep = [
    'Conservative and Unionist Party', 'Green Party', 'Labour Party', 
    'Liberal Democrats', 'Reform UK'
]
filtered_data = data[data['RegulatedEntityName'].isin(parties_to_keep)]

# Group by 'AcceptedDate' and 'RegulatedEntityName' to calculate cumulative donations
filtered_data['Cumulative Sum'] = filtered_data.groupby('RegulatedEntityName')['Value'].cumsum()

# Rename columns for clarity
filtered_data = filtered_data.rename(columns={
    'AcceptedDate': 'Date', 
    'RegulatedEntityName': 'Party', 
    'Value': 'Donation'
})

# Plot the data with reversed X-axis
fig = px.line(
    filtered_data, 
    x='Date', 
    y='Cumulative Sum', 
    color='Party', 
    title='Cumulative Donations Accepted Past 4 Jun Over Time by Party',
    labels={'Cumulative Sum': 'Cumulative Donation (£)'}
)

# Reverse the X-axis
fig.update_layout(xaxis=dict(autorange="reversed"))

fig.show()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

