In [4]:
import os
import json
import pandas as pd
 
directory = "F:\\d-drive\\C.P\\pulse-master\\data"
 
# Function to rename sub-directories
def rename(directory):
    for root, dirs, files in os.walk(directory):
        if 'state' in dirs:
            state_dir = os.path.join(root, "state")
            for state_folder in os.listdir(state_dir):
                old_path = os.path.join(state_dir, state_folder)
                new_path = os.path.join(state_dir, state_folder.title().replace("-", ' ').replace('&', 'and'))
                if not os.path.exists(new_path):
                    os.rename(old_path, new_path)
    print("Renamed all sub-directories successfully")
 
# Function to extract paths of 'state' directories
def extract_paths(directory):
    path_list = []
    for root, dirs, files in os.walk(directory):
        if os.path.basename(root) == 'state':
            path_list.append(root.replace('\\', '/'))
    return path_list
 
rename(directory)
 
state_directories = extract_paths(directory)
 
# Dictionaries to store user and transaction data
map_users = {
    "state": [],
    "year": [],
    "quarter": [],
    "district": [],
    "registeredUsers": [],
    "appOpens": []
}
 
map_transactions = {
    "state": [],
    "year": [],
    "quarter": [],
    "district": [],
    "count": [],
    "amount": []
}
 
agg_trans_dict = {
    'State': [], 'Year': [], 'Quarter': [], 'Transaction_type': [],
    'Transaction_count': [], 'Transaction_amount': []
}
 
agg_user_dict = {
    'State': [], 'Year': [], 'Quarter': [], 'Brand': [],
    'Transaction_count': [], 'Percentage': []
}
 
top_user_dist_dict = {
    'State': [], 'Year': [], 'Quarter': [],
    'District': [], 'Registered_users': []
}
 
top_user_pin_dict = {
    'State': [], 'Year': [], 'Quarter': [],
    'Pincode': [], 'Registered_users': []
}
 
top_trans_pin_dict = {
    'State': [], 'Year': [], 'Quarter': [], 'Pincode': [],
    'Transaction_count': [], 'Transaction_amount': []
}
 
top_trans_dist_dict = {
    'State': [], 'Year': [], 'Quarter': [], 'District': [],
    'Transaction_count': [], 'Transaction_amount': []
}
 
# Main loop to process directories and extract data
for state_path in state_directories:
    state_list = os.listdir(state_path)
    for state in state_list:
        year_path = os.path.join(state_path, state)
        if os.path.isdir(year_path):
            year_list = os.listdir(year_path)
 
            for year in year_list:
                quarter_path = os.path.join(year_path, year)
                if os.path.isdir(quarter_path):
                    quarter_list = os.listdir(quarter_path)
 
                    for quarter in quarter_list:
                        json_path = os.path.join(quarter_path, quarter)
                        if json_path.endswith(".json"):
                            try:
                                with open(json_path, "r") as content:
                                    data = json.load(content)
 
                                # Extract hover data for map_users
                                hover_data = data['data'].get('hoverData', {})
                                for district, info in hover_data.items():
                                    map_users['district'].append(district)
                                    map_users['registeredUsers'].append(info.get("registeredUsers", 0))
                                    map_users['appOpens'].append(info.get("appOpens", 0))
                                    map_users["state"].append(state)
                                    map_users["year"].append(year)
                                    map_users["quarter"].append(quarter.strip('.json'))
 
                            except:
                                pass
 
                            try:
                                # Extract hover data for map_transactions
                                hover_data = data['data'].get('hoverDataList', [])
                                for item in hover_data:
                                    district = item["name"]
                                    metrics = item["metric"]
                                    count = metrics[0]["count"]
                                    amount = metrics[0]["amount"]
                                    map_transactions['district'].append(district)
                                    map_transactions['count'].append(count)
                                    map_transactions['amount'].append(amount)
                                    map_transactions["state"].append(state)
                                    map_transactions["year"].append(year)
                                    map_transactions["quarter"].append(quarter.strip('.json'))
 
                            except:
                                pass
 
                            try:
                                # Extract users by device for agg_user_dict
                                users_by_device = data['data'].get('usersByDevice', [])
                                for device in users_by_device:
                                    brand = device['brand']
                                    count = device['count']
                                    percentage = device['percentage']
                                    agg_user_dict['State'].append(state)
                                    agg_user_dict['Year'].append(year)
                                    agg_user_dict['Quarter'].append(quarter.strip('.json'))
                                    agg_user_dict['Brand'].append(brand)
                                    agg_user_dict['Transaction_count'].append(count)
                                    agg_user_dict['Percentage'].append(percentage)
 
                            except:
                                pass
 
                            try:
                                # Extract district data for top_user_dist_dict and top_trans_dist_dict
                                districts = data['data'].get('districts', [])
                                for district in districts:
                                    name = district.get('name') or district.get('entityName')
                                    registeredUsers = district.get('registeredUsers')
                                    if registeredUsers is not None:
                                        top_user_dist_dict['State'].append(state)
                                        top_user_dist_dict['Year'].append(year)
                                        top_user_dist_dict['Quarter'].append(quarter.strip('.json'))
                                        top_user_dist_dict['District'].append(name)
                                        top_user_dist_dict['Registered_users'].append(registeredUsers)
                                    else:
                                        count = district['metric']['count']
                                        amount = district['metric']['amount']
                                        top_trans_dist_dict['State'].append(state)
                                        top_trans_dist_dict['Year'].append(year)
                                        top_trans_dist_dict['Quarter'].append(quarter.strip('.json'))
                                        top_trans_dist_dict['District'].append(name)
                                        top_trans_dist_dict['Transaction_count'].append(count)
                                        top_trans_dist_dict['Transaction_amount'].append(amount)
 
                            except:
                                pass
 
                            try:
                                # Extract pincode data for top_user_pin_dict and top_trans_pin_dict
                                pincodes = data['data'].get('pincodes', [])
                                for pincode in pincodes:
                                    name = pincode.get('name') or pincode.get('entityName')
                                    registeredUsers = pincode.get('registeredUsers')
                                    if registeredUsers is not None:
                                        top_user_pin_dict['State'].append(state)
                                        top_user_pin_dict['Year'].append(year)
                                        top_user_pin_dict['Quarter'].append(quarter.strip('.json'))
                                        top_user_pin_dict['Pincode'].append(name)
                                        top_user_pin_dict['Registered_users'].append(registeredUsers)
                                    else:
                                        count = pincode['metric']['count']
                                        amount = pincode['metric']['amount']
                                        top_trans_pin_dict['State'].append(state)
                                        top_trans_pin_dict['Year'].append(year)
                                        top_trans_pin_dict['Quarter'].append(quarter.strip('.json'))
                                        top_trans_pin_dict['Pincode'].append(name)
                                        top_trans_pin_dict['Transaction_count'].append(count)
                                        top_trans_pin_dict['Transaction_amount'].append(amount)
 
                            except:
                                pass
 
                            try:
                                # Extract aggregated transaction data for agg_trans_dict
                                transaction_data = data['data'].get('transactionData', [])
                                for transaction in transaction_data:
                                    transaction_type = transaction['name']
                                    payment_instruments = transaction['paymentInstruments']
                                    for instrument in payment_instruments:
                                        count = instrument['count']
                                        amount = instrument['amount']
                                        agg_trans_dict['State'].append(state)
                                        agg_trans_dict['Year'].append(year)
                                        agg_trans_dict['Quarter'].append(quarter.strip('.json'))
                                        agg_trans_dict['Transaction_type'].append(transaction_type)
                                        agg_trans_dict['Transaction_count'].append(count)
                                        agg_trans_dict['Transaction_amount'].append(amount)
                            except:
                                pass
 
 

Renamed all sub-directories successfully


In [5]:
df1 = pd.DataFrame(map_users)
print(df1)
df1.to_csv('map_users.csv', index=False)

                             state  year quarter  \
0      Andaman And Nicobar Islands  2018       1   
1      Andaman And Nicobar Islands  2018       1   
2      Andaman And Nicobar Islands  2018       1   
3      Andaman And Nicobar Islands  2018       2   
4      Andaman And Nicobar Islands  2018       2   
...                            ...   ...     ...   
19027                  West Bengal  2024       2   
19028                  West Bengal  2024       2   
19029                  West Bengal  2024       2   
19030                  West Bengal  2024       2   
19031                  West Bengal  2024       2   

                                district  registeredUsers  appOpens  
0      north and middle andaman district              632         0  
1                 south andaman district             5846         0  
2                      nicobars district              262         0  
3      north and middle andaman district              911         0  
4                 south a

In [6]:
df2 = pd.DataFrame(map_transactions)
print(df2)
df2.to_csv('map_transactions.csv', index=False)







                             state  year quarter  \
0      Andaman And Nicobar Islands  2020       2   
1      Andaman And Nicobar Islands  2020       2   
2      Andaman And Nicobar Islands  2020       3   
3      Andaman And Nicobar Islands  2020       3   
4      Andaman And Nicobar Islands  2020       3   
...                            ...   ...     ...   
31314                  West Bengal  2024       2   
31315                  West Bengal  2024       2   
31316                  West Bengal  2024       2   
31317                  West Bengal  2024       2   
31318                  West Bengal  2024       2   

                                district     count        amount  
0                 south andaman district         3  7.950000e+02  
1                      nicobars district         3  5.650000e+02  
2      north and middle andaman district         1  2.810000e+02  
3                 south andaman district        35  1.365100e+04  
4                      nicobars district

In [7]:
df3 = pd.DataFrame(agg_trans_dict)
print(df3)
df3.to_csv('agg_trans_dict.csv', index=False)




                            State  Year Quarter          Transaction_type  \
0     Andaman And Nicobar Islands  2020       2                 Insurance   
1     Andaman And Nicobar Islands  2020       3                 Insurance   
2     Andaman And Nicobar Islands  2020       4                 Insurance   
3     Andaman And Nicobar Islands  2021       1                 Insurance   
4     Andaman And Nicobar Islands  2021       2                 Insurance   
...                           ...   ...     ...                       ...   
5279                  West Bengal  2024       2         Merchant payments   
5280                  West Bengal  2024       2     Peer-to-peer payments   
5281                  West Bengal  2024       2  Recharge & bill payments   
5282                  West Bengal  2024       2        Financial Services   
5283                  West Bengal  2024       2                    Others   

      Transaction_count  Transaction_amount  
0                     6      

In [8]:
df4 = pd.DataFrame(top_user_dist_dict)
print(df4)
df4.to_csv('top_user_dist_dict.csv', index=False)





                            State  Year Quarter                  District  \
0     Andaman and Nicobar Islands  2018       1             south andaman   
1     Andaman and Nicobar Islands  2018       1  north and middle andaman   
2     Andaman and Nicobar Islands  2018       1                  nicobars   
3     Andaman and Nicobar Islands  2018       2             south andaman   
4     Andaman and Nicobar Islands  2018       2  north and middle andaman   
...                           ...   ...     ...                       ...   
7691                  West Bengal  2024       2                    howrah   
7692                  West Bengal  2024       2                     nadia   
7693                  West Bengal  2024       2           purba medinipur   
7694                  West Bengal  2024       2         paschim medinipur   
7695                  West Bengal  2024       2           purba bardhaman   

      Registered_users  
0                 5846  
1                  632  


In [9]:
df5 = pd.DataFrame(top_user_pin_dict)
print(df5)
df5.to_csv('top_user_pin_dict.csv', index=False)






                            State  Year Quarter Pincode  Registered_users
0     Andaman and Nicobar Islands  2018       1  744103              1608
1     Andaman and Nicobar Islands  2018       1  744101              1108
2     Andaman and Nicobar Islands  2018       1  744105              1075
3     Andaman and Nicobar Islands  2018       1  744102              1006
4     Andaman and Nicobar Islands  2018       1  744104               272
...                           ...   ...     ...     ...               ...
9277                  West Bengal  2024       2  700015            136835
9278                  West Bengal  2024       2  721101            132872
9279                  West Bengal  2024       2  742304            131799
9280                  West Bengal  2024       2  700150            128721
9281                  West Bengal  2024       2  700091            127531

[9282 rows x 5 columns]


In [10]:
df6 = pd.DataFrame(top_trans_pin_dict)
print(df6)
df6.to_csv('top_trans_pin_dict.csv', index=False)





                             State  Year Quarter Pincode  Transaction_count  \
0      Andaman and Nicobar Islands  2020       2  744301                  3   
1      Andaman and Nicobar Islands  2020       2  744104                  2   
2      Andaman and Nicobar Islands  2020       2  744101                  1   
3      Andaman and Nicobar Islands  2020       3  744112                  9   
4      Andaman and Nicobar Islands  2020       3  744105                  7   
...                            ...   ...     ...     ...                ...   
15231                  West Bengal  2024       2  721301            5404541   
15232                  West Bengal  2024       2  732201            5321216   
15233                  West Bengal  2024       2  700006            4851340   
15234                  West Bengal  2024       2  742202            4779726   
15235                  West Bengal  2024       2  700001            4678132   

       Transaction_amount  
0            5.650000e+

In [11]:


df7 = pd.DataFrame(top_trans_dist_dict)
print(df7)
df7.to_csv('top_trans_dist_dict.csv', index=False)



                             State  Year Quarter                  District  \
0      Andaman and Nicobar Islands  2020       2                  nicobars   
1      Andaman and Nicobar Islands  2020       2             south andaman   
2      Andaman and Nicobar Islands  2020       3             south andaman   
3      Andaman and Nicobar Islands  2020       3                  nicobars   
4      Andaman and Nicobar Islands  2020       3  north and middle andaman   
...                            ...   ...     ...                       ...   
12698                  West Bengal  2024       2                    maldah   
12699                  West Bengal  2024       2                     nadia   
12700                  West Bengal  2024       2                    howrah   
12701                  West Bengal  2024       2         paschim medinipur   
12702                  West Bengal  2024       2                   hooghly   

       Transaction_count  Transaction_amount  
0               

In [12]:
df8 = pd.DataFrame(agg_user_dict)
print(df8)
df8.to_csv('agg_user_dict.csv', index=False)

                            State  Year Quarter    Brand  Transaction_count  \
0     Andaman And Nicobar Islands  2018       1   Xiaomi               1665   
1     Andaman And Nicobar Islands  2018       1  Samsung               1445   
2     Andaman And Nicobar Islands  2018       1     Vivo                982   
3     Andaman And Nicobar Islands  2018       1     Oppo                501   
4     Andaman And Nicobar Islands  2018       1  OnePlus                332   
...                           ...   ...     ...      ...                ...   
6727                  West Bengal  2022       1   Lenovo             330017   
6728                  West Bengal  2022       1  Infinix             284678   
6729                  West Bengal  2022       1     Asus             280347   
6730                  West Bengal  2022       1    Apple             277752   
6731                  West Bengal  2022       1   Others            2196334   

      Percentage  
0       0.247033  
1       0.214