<a href="https://colab.research.google.com/github/Natdata/Demography_from_CT_file_saving/blob/main/Demografia_plik_do_admina.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import requests
import json
import pandas as pd
import time

# Define headers and other constants for API requests
headers = {
    'X-CleverTap-Account-Id': 'aaaa    'X-CleverTap-Passcode': 'bbbb    'Content-Type': 'application/json',
}

# Define the date range for the data to be retrieved
start = 20240801
end = 20240831

# List of brand names to be queried
brands = [
    'Lidl', 'Carrefour', 'Carrefour market', 'Carrefour Express', 'Globi', 'Intermarche', 'POLOmarket',
    'Duży Ben', 'Społem - Blisko i Korzystnie', 'bi1', 'Twój market', 'Aldi', 'Dealz', 'Hipper.pl', 'Gram Market', 'Lewiatan',
    'Netto', 'Emma Market', 'Stokrotka', 'Jysk', 'Delikatesy Centrum', 'Torimpex', 'Prymus AGD',
    'Market Point', 'Hitpol', 'Sklep Polski', 'MRÓWKA/MARKA', 'Leroy Merlin',
    'Chata Polska', 'Oriflame', 'Arhelan', 'Pepco', 'Drogerie DM', 'Empik', 'TOPAZ', 'Mrówka Lubliniec', 'Supeco', 'Blu Salony Łazienek',
    'Kaufland', 'Drogerie Natura', 'Żabka', 'Media Markt', 'Drogerie Polskie', 'Action', 'Empik Foto',
    'Api Market', 'Tedi', 'Auchan', 'Avon', 'Black Red White',
    'Euro Sklep', 'Groszek', 'Hebe', 'Rossmann', 'Woolworth', 'Drogerie Koliber', '3W', 'Dr.Max Drogeria', 'Gemini.pl', 'Kamrat', 'PSB Mrówka',
    'Poczta Polska', 'Glovo', 'Chorten', 'Dnipro-M', 'Media Star', 'Jula', 'Douglas', 'Delio', 'Limonka', 'Prymus AGD / Home&Deco', 'Super Prezenty', 'Avia'
]

# Mapping of brand names to brand IDs
brand_id_mapping = {
    'Lidl': 3, 'Carrefour': 39, 'Carrefour market': 46, 'Carrefour Express': 47, 'Globi': 53, 'Intermarche': 41,
    'POLOmarket': 9, 'Duży Ben': 57, 'Społem - Blisko i Korzystnie': 51, 'bi1': 42, 'Twój market': 40, 'Aldi': 4,
    'Dealz': 63, 'Hipper.pl': 71, 'Gram Market': 73, 'Lewiatan': 26, 'Netto': 62, 'Emma Market': 81,
    'Stokrotka': 75, 'Jysk': 50, 'Delikatesy Centrum': 31, 'Torimpex': 86, 'Prymus AGD': 85, 'Market Point': 88,
    'Hitpol': 84, 'Sklep Polski': 102, 'MRÓWKA/MARKA': 102, 'Leroy Merlin': 107, 'Chata Polska': 120, 'Oriflame': 119,
    'Arhelan': 112, 'Pepco': 33, 'Drogerie DM': 121, 'Empik': 114, 'TOPAZ': 55, 'Mrówka Lubliniec': 122, 'Supeco': 54,
    'Blu Salony Łazienek': 67, 'Kaufland': 28, 'Drogerie Natura': 76, 'Żabka': 44, 'Media Markt': 113,
    'Drogerie Polskie': 111, 'Action': 66, 'Empik Foto': 115, 'Api Market': 49, 'Tedi': 124, 'Auchan': 52,
    'Avon': 127, 'Black Red White': 129, 'Euro Sklep': 135, 'Groszek': 136, 'Hebe': 99, 'Rossmann': 60,
    'Woolworth': 132, 'Drogerie Koliber': 128, '3W': 139, 'Dr.Max Drogerie': 105, 'Gemini.pl': 143, 'Kamrat': 141,
    'PSB Mrówka': 142, 'Poczta Polska': 147, 'Glovo': 146, 'Chorten': 144, 'Dnipro-M': 149, 'Media Star': 150,
    'Jula': 151, 'Douglas': 152, 'Delio': 153, 'Limonka': 154, 'Prymus AGD / Home&Deco': 155, 'Super Prezenty': 156, 'Avia': 157
}


# Define age ranges for demographic breakdown
age_ranges = [
    (2000, 2006),  # 18-24
    (1990, 1999),  # 25-34
    (1980, 1989),  # 35-44
    (1970, 1979),  # 45-54
    (1924, 1969)   # 55-100
]

# Define genders for demographic breakdown
genders = ["Male", "Female"]

# Initialize a list to store results and a set to keep track of brands to be skipped
results = []
skipped_brands = []

# Iterate over each brand to collect and process data
for brand in brands:
    # Initialize result structure for each brand with default values of zero
    row = {
        "date": start,
        "brand_id": brand_id_mapping[brand],
        "men18to24": 0, "men25to34": 0, "men35to44": 0, "men45to54": 0, "men55": 0,
        "women18to24": 0, "women25to34": 0, "women35to44": 0, "women45to54": 0, "women55": 0
    }

    # Iterate over each gender and age range to collect counts
    for gender in genders:
        for age_range in age_ranges:
            # Define the data payload for the API request
            data = {
                "event_name": "leaflet_enter",
                "event_properties": [{"name": "brand_name", "operator": "equals", "value": brand}],
                "common_profile_properties": {
                    "profile_fields": [
                        {
                            "name": "user_birthdate",
                            "operator": "between",
                            "value": age_range
                        }
                    ],
                    "demographics": [
                        {
                            "name": "Gender",
                            "value": gender
                        }
                    ]
                },
                "from": start,
                "to": end
            }

            # Convert data to JSON format for API request
            json_data = json.dumps(data)

            # Make a POST request to get the count of events
            response = requests.post('https://eu1.api.clevertap.com/1/counts/events.json', headers=headers, data=json_data)
            response_json = response.json()

            # Extract request ID from the response
            req_id = response_json['req_id']

            # Wait for the data to be processed
            time.sleep(5)

            # Make a GET request to retrieve the result using the request ID
            response1 = requests.get(f'https://eu1.api.clevertap.com/1/counts/events.json?req_id={req_id}', headers=headers)
            response1_json = response1.json()

            # Get the count of events from the response
            count = response1_json.get("count", 0)

            # Update the count based on gender and age range
            if gender == "Male":
                if age_range == (2000, 2006):
                    row["men18to24"] += count
                elif age_range == (1990, 1999):
                    row["men25to34"] += count
                elif age_range == (1980, 1989):
                    row["men35to44"] += count
                elif age_range == (1970, 1979):
                    row["men45to54"] += count
                elif age_range == (1924, 1969):
                    row["men55"] += count
            elif gender == "Female":
                if age_range == (2000, 2006):
                    row["women18to24"] += count
                elif age_range == (1990, 1999):
                    row["women25to34"] += count
                elif age_range == (1980, 1989):
                    row["women35to44"] += count
                elif age_range == (1970, 1979):
                    row["women45to54"] += count
                elif age_range == (1924, 1969):
                    row["women55"] += count

    # Check if all demographic values are zero for the current brand
    if all(value == 0 for key, value in row.items() if key not in ["date", "brand_id"]):
        # If all values are zero, add brand to skipped list
        skipped_brands.append(brand)
    else:
        # If not all values are zero, add the result to the results list
        results.append(row)

# Convert the accumulated results to a DataFrame
df = pd.DataFrame(results)

# Save all non-zero results to a CSV file
df.to_csv('demografia_plik_do_admina.csv', index=False)

# # Print the results DataFrame
# print('Wyniki dla wszystkich brandów:')
# print(df)
# print()

# Print the list of skipped brands if any
if skipped_brands:
    print('Brandy, które zostały pominięte, ponieważ wszystkie dane były zerowe:')
    print(', '.join(skipped_brands))
else:
    print('Nie pominięto żadnych brandów.')



Wyniki dla wszystkich brandów:
       date  brand_id  men18to24  men25to34  men35to44  men45to54  men55  \
0  20240601         3      10155      24248      32943      20788  28116   

   women18to24  women25to34  women35to44  women45to54  women55  
0        25401       102395        99218        61928    71093  
Brandy, które zostały pominięte, ponieważ wszystkie dane były zerowe:
3W
