In [1]:
import pandas as pd
from datetime import datetime
import json

In [2]:
clients = {}
df = pd.read_csv(
    "metricscontribution_rate_initiative_cross_client_data_1613_for_students.csv")
client_ids = df.client_id.unique()
for client_id in client_ids:
    client_id = str(client_id)
    clients.update(dict({client_id: {}}))

# print(clients)

In [3]:
for client_id in client_ids:
    client_specific_df = df[df["client_id"] == client_id][[
        "period_end", "source", "source_type", "attribution", "customers"]]

    periods = sorted(client_specific_df['period_end'].str[:10].unique())  # Sort the first 10 characters of the date strings

    # Update the dictionary for the client with periods
    period_dict = {}
    for period in periods:
        period_data = {}
        # Find the nr of attributed and unattributed customers
        period_df = client_specific_df[(client_specific_df['period_end'].str[:10] == period)]
        attributed_customers = period_df[period_df['attribution'] == 'ATTRIBUTED']['customers'].sum()
        unattributed_customers = period_df[period_df['attribution'] != 'ATTRIBUTED']['customers'].sum()
        # Update the period data with the count of ATTRIBUTED and UNATTRIBUTED count
        # and the contribution rate for this period
        period_data["period"] = period  # Use the first 10 characters of the date string
        period_data["ATTRIBUTED"] = int(attributed_customers)
        period_data["UNATTRIBUTED"] = int(unattributed_customers)
        period_data["CR"] = (attributed_customers / (attributed_customers + unattributed_customers) * 100)

        # Find and all the records for the period
        records = []
        sources = period_df['source'].unique()
        for source in period_df['source'].unique():
            source_df = period_df[period_df['source'] == source]
            source_data = {
                'source': str(source),  # Include source name
                'source_type': str(source_df.iloc[0]['source_type']),
                'ATTRIBUTED': str(source_df['attribution'].eq('ATTRIBUTED').any()),
                'customers': int(source_df['customers'].sum())
            }
            records.append(source_data)

        period_data["records"] = records
        # Append the period data to the list for this client
        period_dict[period] = period_data  # Use the first 10 characters of the date string

    # Add everything to the final dictionary for this client
    clients[str(client_id)] = period_dict


In [4]:
print(clients["2074305340"]["2023-02-12"])
file_path = "clients.json"
json_string = json.dumps(clients)
with open(file_path, 'w') as f:
    f.write(json_string)

{'period': '2023-02-12', 'ATTRIBUTED': 1, 'UNATTRIBUTED': 38, 'CR': 2.564102564102564, 'records': [{'source': 'direct', 'source_type': 'link', 'ATTRIBUTED': 'False', 'customers': 38}, {'source': 'friend_landing_experience', 'source_type': 'link', 'ATTRIBUTED': 'True', 'customers': 1}]}
