# Phonepe Pulse

The Indian digital payments story has truly captured the world's imagination. From the largest towns to the remotest villages, there is a payments revolution being driven by the penetration of mobile phones, mobile internet and state-of-the-art payments infrastructure built as Public Goods championed by the central bank and the government. Founded in December 2015, PhonePe has been a strong beneficiary of the API driven digitization of payments in India. When we started, we were constantly looking for granular and definitive data sources on digital payments in India. PhonePe Pulse is our way of giving back to the digital payments ecosystem.

In [1]:
# Loading the python packages
import pandas as pd
import numpy as np
import json
import os

In [2]:
# The updated States dictionary to clean the data to be more readable for the user
state_dict = {
    'andaman-&-nicobar-islands': 'Andaman & Nicobar Islands',
    'andhra-pradesh': 'Andhra Pradesh',
    'arunachal-pradesh': 'Arunachal Pradesh',
    'assam': 'Assam',
    'bihar': 'Bihar',
    'chandigarh': 'Chandigarh',
    'chhattisgarh': 'Chhattisgarh',
    'dadra-&-nagar-haveli-&-daman-&-diu': 'Dadra & Nagar Haveli & Daman & Diu',
    'delhi': 'Delhi',
    'goa': 'Goa',
    'gujarat': 'Gujarat',
    'haryana': 'Haryana',
    'himachal-pradesh': 'Himachal Pradesh',
    'jammu-&-kashmir': 'Jammu & Kashmir',
    'jharkhand': 'Jharkhand',
    'karnataka': 'Karnataka',
    'kerala': 'Kerala',
    'ladakh': 'Ladakh',
    'lakshadweep': 'Lakshadweep',
    'madhya-pradesh': 'Madhya Pradesh',
    'maharashtra': 'Maharashtra',
    'manipur': 'Manipur',
    'meghalaya': 'Meghalaya',
    'mizoram': 'Mizoram',
    'nagaland': 'Nagaland',
    'odisha': 'Odisha',
    'puducherry': 'Puducherry',
    'punjab': 'Punjab',
    'rajasthan': 'Rajasthan',
    'sikkim': 'Sikkim',
    'tamil-nadu': 'Tamil Nadu',
    'telangana': 'Telangana',
    'tripura': 'Tripura',
    'uttar-pradesh': 'Uttar Pradesh',
    'uttarakhand': 'Uttarakhand',
    'west-bengal': 'West Bengal'
}

print(state_dict)

{'andaman-&-nicobar-islands': 'Andaman & Nicobar Islands', 'andhra-pradesh': 'Andhra Pradesh', 'arunachal-pradesh': 'Arunachal Pradesh', 'assam': 'Assam', 'bihar': 'Bihar', 'chandigarh': 'Chandigarh', 'chhattisgarh': 'Chhattisgarh', 'dadra-&-nagar-haveli-&-daman-&-diu': 'Dadra & Nagar Haveli & Daman & Diu', 'delhi': 'Delhi', 'goa': 'Goa', 'gujarat': 'Gujarat', 'haryana': 'Haryana', 'himachal-pradesh': 'Himachal Pradesh', 'jammu-&-kashmir': 'Jammu & Kashmir', 'jharkhand': 'Jharkhand', 'karnataka': 'Karnataka', 'kerala': 'Kerala', 'ladakh': 'Ladakh', 'lakshadweep': 'Lakshadweep', 'madhya-pradesh': 'Madhya Pradesh', 'maharashtra': 'Maharashtra', 'manipur': 'Manipur', 'meghalaya': 'Meghalaya', 'mizoram': 'Mizoram', 'nagaland': 'Nagaland', 'odisha': 'Odisha', 'puducherry': 'Puducherry', 'punjab': 'Punjab', 'rajasthan': 'Rajasthan', 'sikkim': 'Sikkim', 'tamil-nadu': 'Tamil Nadu', 'telangana': 'Telangana', 'tripura': 'Tripura', 'uttar-pradesh': 'Uttar Pradesh', 'uttarakhand': 'Uttarakhand', '

# Aggregated Data

## Insurance

### Insurance by Aggregated Values

Extracting the dataset aggregated values, This data frame will show the transaction values for each years breakdown for years Quarters, the data we can collect from this are <br/>
- Name : Insurance <br/>
- Count : No of Transaction <br/>
- Amount : Amount of Transactions in INR <br/>
- Year : Year <br/>
- Quarter : Quarter <br/>
- Average Transaction : Amount spent in average for each Quarter (Total Amount/Count)

In [3]:
path = "Phonepe-pulse-master/data/aggregated/insurance/country/india/"
years = os.listdir(path)

# creating an empty dictionary to collect all the data
data = {
    "Year": [],
    "Quarter": [],
    "Name": [],
    "Count" : [],
    "Amount": [],
    "Average Transaction": [] # average Transaction value per Quarter.
}

# creating a loop to open find all the years path
for j in years:
    p_j = path + j + "/"
    Quarters = os.listdir(p_j)

    # Creating loop to open the Json files within the years path
    for k in Quarters:
        p_k = p_j + k 
        Data = open(p_k, "r")
        # Reading the Json file
        d = json.load(Data)

        # filling the data dictionary with the collected data
        try:
            for i in d["data"]["transactionData"]:
                Name = i["name"]
                count = i["paymentInstruments"][0]["count"]
                amount = i["paymentInstruments"][0]["amount"]
                average = amount/count

                # including the data within empty directory
                data["Year"].append(j)
                data["Quarter"].append("Q"+str(int(k.strip(".json"))))
                data["Name"].append(Name)
                data["Count"].append(count)
                # converting the amount to float
                data["Amount"].append(float(amount))
                data["Average Transaction"].append(float(average))
        
        except:
            pass # Leave empty if there are missing values



# Creating a Data frame
agg_insurance = pd.DataFrame(data)
# Checking the top 5 rows
agg_insurance.head()

Unnamed: 0,Year,Quarter,Name,Count,Amount,Average Transaction
0,2020,Q2,Insurance,185348,33732166.0,181.993688
1,2020,Q3,Insurance,354284,89495076.0,252.608292
2,2020,Q4,Insurance,248626,170979933.0,687.699328
3,2021,Q1,Insurance,318119,206307024.0,648.521541
4,2021,Q2,Insurance,363989,295066678.0,810.647239


In [4]:
# Create a Csv files for data frame
agg_insurance.to_csv("Data/aggregated/agg_insurance.csv", index=False)

Now we create a data frame for aggregated values for insurance by Indian states, These are the possible data point we can collect <br/>
- Name : Insurance <br/>
- Count : No of Transaction <br/>
- Amount : Amount of Transactions in INR <br/>
- State name: Indian State Name <br/>
- Year : Year <br/>
- Quarter : Quarter <br/>
- Average Transaction : Amount spent in average for each Quarter (Total Amount/Count)

In [5]:
# Aggregated values by states
path = "Phonepe-pulse-master/data/aggregated/insurance/country/state/"
# States
states = os.listdir(path)

# creating an empty dictionary to collect all the data

data = {
    "State": [],
    "Year": [],
    "Quarter": [],
    "Name": [],
    "Count":[],
    "Amount": [],
    "Average Transaction" : []
    
}


for i in states:
    p_i = path + i + "/"
    years = os.listdir(p_i) # Years in path


    # Creating loop to open the years within each states
    for j in years:
        p_j = p_i + j + "/"
        quarters = os.listdir(p_j) # Quarters in path


        # Creating loop to open the Json files within the years path
        for k in quarters:
            p_k = p_j + k
            # opening the json files
            Data = open(p_k, "r")
            d = json.load(Data)


            # filling the Data Frame
            try:
                for z in d["data"]["transactionData"]:
                    name = z["name"]
                    count = z["paymentInstruments"][0]["count"]
                    amount = z["paymentInstruments"][0]["amount"]
                    average = amount/count

                    # updating the data frame
                    data["State"].append(i)
                    data["Year"].append(j)
                    data["Quarter"].append("Q"+str(int(k.strip(".json"))))
                    data["Name"].append(name)
                    data["Count"].append(count)
                    data["Amount"].append(amount)
                    data["Average Transaction"].append(average)
                    

            except:
                pass



# Creating a Data frame
state_insurance = pd.DataFrame(data)


# Checking the top 5 rows
state_insurance.head()

Unnamed: 0,State,Year,Quarter,Name,Count,Amount,Average Transaction
0,andaman-&-nicobar-islands,2020,Q2,Insurance,6,1360.0,226.666667
1,andaman-&-nicobar-islands,2020,Q3,Insurance,41,15380.0,375.121951
2,andaman-&-nicobar-islands,2020,Q4,Insurance,124,157975.0,1273.991935
3,andaman-&-nicobar-islands,2021,Q1,Insurance,225,244266.0,1085.626667
4,andaman-&-nicobar-islands,2021,Q2,Insurance,137,181504.0,1324.846715


In [6]:
# Data Transformation
state_insurance["State"] = state_insurance["State"].replace(state_dict)

# Create a Csv files for data frame
state_insurance.to_csv("Data/aggregated/state_agg_insurance.csv", index=False)
state_insurance.head()

Unnamed: 0,State,Year,Quarter,Name,Count,Amount,Average Transaction
0,Andaman & Nicobar Islands,2020,Q2,Insurance,6,1360.0,226.666667
1,Andaman & Nicobar Islands,2020,Q3,Insurance,41,15380.0,375.121951
2,Andaman & Nicobar Islands,2020,Q4,Insurance,124,157975.0,1273.991935
3,Andaman & Nicobar Islands,2021,Q1,Insurance,225,244266.0,1085.626667
4,Andaman & Nicobar Islands,2021,Q2,Insurance,137,181504.0,1324.846715


## Transactions

### Transactions by Aggregated Values
Extracting the aggregated value for Transaction wise breakdown. The Data frame will include following 
<br/>
- Year: Year <br/>
- Quarter: Quarter <br/>
- Name : Name of the transaction <br/>
- Count: No of Transaction <br/>
- Amount: Total Amount <br/>
- Average Transaction : Average Transaction


In [7]:
# Aggregated Transaction Data
path = "Phonepe-pulse-master/data/aggregated/transaction/country/india/"

# path of Years
Years = os.listdir(path)


# creating an empty dictionary to collect all the data

data = {
    "Year":[],
    "Quarter":[],
    "Name": [],
    "Count": [],
    "Amount": [],
    "Average Transaction" : []
    
}


for j in Years:
    # path to access Years
    p_j = path + j + "/"
    # 
    Quarters = os.listdir(p_j)


    for k in Quarters:
        # path to access Quarters
        p_k = p_j + k
        Data = open(p_k, "r")
        d = json.load(Data)

        try:
            for z in d['data']["transactionData"]:
                name = z['name']
                count = z['paymentInstruments'][0]["count"]
                amount = z['paymentInstruments'][0]["amount"]
                average = amount/count

                # updating the data frame

                data["Year"].append(j)
                data["Quarter"].append("Q"+str(int(k.strip(".json"))))
                data["Name"].append(name)
                data["Count"].append(count)
                data["Amount"].append(amount)
                data["Average Transaction"].append(average)


        except:
            pass

# Creating a Data frame
agg_transaction = pd.DataFrame(data)


# Checking the top 5 rows
agg_transaction.head()

Unnamed: 0,Year,Quarter,Name,Count,Amount,Average Transaction
0,2018,Q1,Recharge & bill payments,72550406,14472710000.0,199.484942
1,2018,Q1,Peer-to-peer payments,46982705,147245900000.0,3134.044401
2,2018,Q1,Merchant payments,5368669,4656679000.0,867.380521
3,2018,Q1,Financial Services,3762820,815853100.0,216.819594
4,2018,Q1,Others,5761576,4643217000.0,805.893613


In [8]:
# Create a Csv files for data frame
agg_transaction.to_csv("Data/aggregated/agg_transaction.csv", index=False)

### Transaction by States

Extracting the aggregated value for Transaction wise breakdown for each states. The Data frame will include following 
<br/>
- States: state in india <br/>
- Year: Year <br/>
- Quarter: Quarter <br/>
- Name : Name of the transaction <br/>
- Count: No of Transaction <br/>
- Amount: Total Amount <br/>
- Average Transaction : Average Transaction


In [9]:
# Aggregated Transaction Data by State
path = "Phonepe-pulse-master/data/aggregated/transaction/country/state/"
# State
states = os.listdir(path)

# creating an empty dictionary to collect all the data

data = {
    "State": [],
    "Year":[],
    "Quarter":[],
    "Name": [],
    "Count": [],
    "Amount": [],
    "Average Transaction" : []
    
}

for i in states:
    p_i = path + i + "/"
    years = os.listdir(p_i) # Years


    for j in years:
        p_j = p_i + j + "/"
        Quarters = os.listdir(p_j) # Quarters

        for k in Quarters:
            p_k = p_j + k
            Data = open(p_k, "r")
            d = json.load(Data)


            # Filling the Data frame
            try:
                for z in d['data']["transactionData"]:
                    name = z['name']
                    count = z["paymentInstruments"][0]['count']
                    amount = z["paymentInstruments"][0]['amount']
                    average = amount/count

                    # updating the data frame
                    data["State"].append(i)
                    data["Year"].append(j)
                    data["Quarter"].append("Q"+str(int(k.strip(".json"))))
                    data['Name'].append(name)
                    data['Count'].append(count)
                    data['Amount'].append(amount)
                    data['Average Transaction'].append(average)

            except:
                pass

# Creating a Data frame
state_transaction = pd.DataFrame(data)


# Checking the top 5 rows
state_transaction.head()

Unnamed: 0,State,Year,Quarter,Name,Count,Amount,Average Transaction
0,andaman-&-nicobar-islands,2018,Q1,Recharge & bill payments,4200,1845307.0,439.358921
1,andaman-&-nicobar-islands,2018,Q1,Peer-to-peer payments,1871,12138660.0,6487.790112
2,andaman-&-nicobar-islands,2018,Q1,Merchant payments,298,452507.2,1518.480432
3,andaman-&-nicobar-islands,2018,Q1,Financial Services,33,10601.42,321.255149
4,andaman-&-nicobar-islands,2018,Q1,Others,256,184689.9,721.44479


In [10]:
# Data Transformation
state_transaction["State"] =state_transaction["State"].replace(state_dict)

# Create a Csv files for data frame
state_transaction.to_csv("Data/aggregated/state_agg_transaction.csv", index=False)

## Users

### Users by Aggregated Values 

Extracting the aggregated value for Users. The Data frame will include following 
- Year : Year
- Quarter : Quarter
- Brand : Phone brands
- Count : User Count
- Percentage : Percentage

In [11]:
# Aggregated Users Data
path = "Phonepe-pulse-master/data/aggregated/user/country/india/"
# year
year = os.listdir(path)

# creating an empty dictionary to collect all the data

data = {
    "Year":[],
    "Quarter":[],
    "Brand": [],
    "Count": [],
    "App Open Percentage": []
    
}

for j in years:
    p_j = path + j + "/"
    Quarters = os.listdir(p_j)

    for k in Quarters:
        p_k = p_j + k
        Data = open(p_k, "r")
        d = json.load(Data)

        # Filling the Data Frame
        try:
            for z in d["data"]["usersByDevice"]:
                name = z["brand"]
                count = z["count"]
                percentage = z["percentage"]

                 # updating the data frame
                data["Year"].append(j)
                data["Quarter"].append("Q"+str(int(k.strip(".json"))))
                data["Brand"].append(name)
                data["Count"].append(count)
                data["App Open Percentage"].append(percentage)

        except:
            pass


# Creating a Data frame
agg_Users = pd.DataFrame(data)


# Checking the top 5 rows
agg_Users.head()

Unnamed: 0,Year,Quarter,Brand,Count,App Open Percentage
0,2018,Q1,Xiaomi,11926334,0.254413
1,2018,Q1,Samsung,9609401,0.204988
2,2018,Q1,Vivo,5894293,0.125737
3,2018,Q1,Oppo,4479351,0.095554
4,2018,Q1,Realme,2376866,0.050703


In [12]:

# Create a Csv files for data frame
agg_Users.to_csv("Data/aggregated/agg_Users.csv", index=False)

### Users by State

Extracting the aggregated value for Transaction wise breakdown for each states. The Data frame will include following 
<br/>
- State: state in india <br/>
- Year : Year<br/>
- Quarter : Quarter<br/>
- Brand : Phone brands<br/>
- Count : User Count<br/>
- Percentage : Percentage<br/>


In [13]:
# Aggregated Users Data by State
path = "Phonepe-pulse-master/data/aggregated/user/country/state/"
# State
states = os.listdir(path)
# creating an empty dictionary to collect all the data

data = {
    "State": [],
    "Year":[],
    "Quarter":[],
    "Brand": [],
    "Count": [],
    "App Open Percentage": []
    
}
for i in states:
    p_i = path + i + "/"
    years = os.listdir(p_i) # Years


    for j in years:
        p_j = p_i + j + "/"
        quarters = os.listdir(p_j) # Quarters


        for k in quarters:
            p_k = p_j + k
            Data = open(p_k, "r")
            d = json.load(Data)

            # filling the Data Frame
            try:
                for z in d["data"]["usersByDevice"]:
                    name = z["brand"]
                    count = z["count"]
                    percentage = z["percentage"]
                    # updating the data frame
                    data["State"].append(i)
                    data["Year"].append(j)
                    data["Quarter"].append("Q"+str(int(k.strip(".json"))))
                    data["Brand"].append(name)
                    data["Count"].append(count)
                    data["App Open Percentage"].append(percentage)
            except:
                pass 


# Creating a Data frame
state_Users = pd.DataFrame(data)


# Checking the top 5 rows
state_Users.head()

Unnamed: 0,State,Year,Quarter,Brand,Count,App Open Percentage
0,andaman-&-nicobar-islands,2018,Q1,Xiaomi,1665,0.247033
1,andaman-&-nicobar-islands,2018,Q1,Samsung,1445,0.214392
2,andaman-&-nicobar-islands,2018,Q1,Vivo,982,0.145697
3,andaman-&-nicobar-islands,2018,Q1,Oppo,501,0.074332
4,andaman-&-nicobar-islands,2018,Q1,OnePlus,332,0.049258


In [14]:
# Data Transformation
state_Users["State"] = state_Users["State"].replace(state_dict)

# Create a Csv files for data frame
state_Users.to_csv("Data/aggregated/state_agg_Users.csv", index=False)

state_Users.head()

Unnamed: 0,State,Year,Quarter,Brand,Count,App Open Percentage
0,Andaman & Nicobar Islands,2018,Q1,Xiaomi,1665,0.247033
1,Andaman & Nicobar Islands,2018,Q1,Samsung,1445,0.214392
2,Andaman & Nicobar Islands,2018,Q1,Vivo,982,0.145697
3,Andaman & Nicobar Islands,2018,Q1,Oppo,501,0.074332
4,Andaman & Nicobar Islands,2018,Q1,OnePlus,332,0.049258


# Map

### Insurance by Map

Extracting Coordinates, from the Map and State and Amount Spent, 
<br/>
- State: state in india <br/>
- Year : Year<br/>
- Quarter : Quarter<br/>
- Latitude : Latitude<br/>
- Longitude : Longitude<br/>
- Amount : Amount<br/>

In [15]:
# Map insurance Data
path = "Phonepe-pulse-master/data/map/insurance/country/india/"


# path of Years
years = os.listdir(path)

# creating an empty dictionary to collect all the data
data = {
    
    "Year":[],
    "Quarter":[],
    "State": [],
    "Latitude": [],
    "Longitude": [],
    "Amount": []
    
}

# Years
for j in years:
    p_j = path + j + "/"
    quarters = os.listdir(p_j) # opening years


    # Quarters 
    for k in quarters:
        p_k = p_j + k
        Data = open(p_k, "r")
        d = json.load(Data)


       # filling the Data Frame

        try:
            for z in d["data"]["data"]["data"]:
                lat = z[0]
                long = z[1]
                metric= z[2]
                state = z[3]

                data["State"].append(state)
                data["Year"].append(j)
                data["Quarter"].append("Q"+str(int(k.strip(".json"))))
                data["Latitude"].append(lat)
                data["Longitude"].append(long)
                data["Amount"].append(metric)
        
        except:
            pass

# Creating a Data frame
insurance_map = pd.DataFrame(data)

# Checking the top 5 rows
insurance_map.head()    

Unnamed: 0,Year,Quarter,State,Latitude,Longitude,Amount
0,2020,Q2,karnataka,12.881175,77.567674,4720.0
1,2020,Q2,telangana,17.428197,78.389911,3186.0
2,2020,Q2,karnataka,12.967107,77.475933,2753.0
3,2020,Q2,telangana,17.340345,78.480878,2674.0
4,2020,Q2,karnataka,12.88555,77.659339,2408.0


In [16]:

insurance_map["State"] = insurance_map["State"].replace(state_dict)

# Create a Csv files for data frame
insurance_map.to_csv("Data/map/map_insurance.csv", index=False)
insurance_map.head()

Unnamed: 0,Year,Quarter,State,Latitude,Longitude,Amount
0,2020,Q2,Karnataka,12.881175,77.567674,4720.0
1,2020,Q2,Telangana,17.428197,78.389911,3186.0
2,2020,Q2,Karnataka,12.967107,77.475933,2753.0
3,2020,Q2,Telangana,17.340345,78.480878,2674.0
4,2020,Q2,Karnataka,12.88555,77.659339,2408.0


### Insurance by State Map

Extracting Coordinates, from the Map and State and Amount Spent, 
<br/>
- State: state in india <br/>
- District: District <br/>
- Year : Year<br/>
- Quarter : Quarter<br/>
- Latitude : Latitude<br/>
- Longitude : Longitude<br/>
- Amount : Amount<br/>

In [17]:
### Insurance by Map by states
path = "Phonepe-pulse-master/data/map/insurance/country/state/"
# path of State
states =  os.listdir(path)

# creating an empty dictionary to collect all the data
data = {
    "State": [],
    "District": [],
    "Year":[],
    "Quarter":[],
    "Latitude": [],
    "Longitude": [],
    "Amount": []
    
}


# opening states
for i in states:
    p_i = path + i +"/"
    
    years = os.listdir(p_i) # path of Years
    
    for j in years:
        p_j = p_i + j + "/"
        quarters = os.listdir(p_j) # opening years


        for k in quarters:
            p_k = p_j + k
            Data = open(p_k, "r")
            d = json.load(Data)



            try:
                for z in d["data"]["data"]["data"]:
                    lat = z[0]
                    long = z[1]
                    metric = z[2]
                    loc = z[3]

                    # updating the data frame
                    data["State"].append(i)
                    data["District"].append(loc.replace(" district", ""))
                    data["Year"].append(j)
                    data["Quarter"].append("Q"+str(int(k.strip(".json"))))
                    data["Latitude"].append(lat)
                    data["Longitude"].append(long)
                    data["Amount"].append(metric)

            except:
                pass

# Creating a Data frame
state_insurance_map = pd.DataFrame(data)

# Checking the top 5 rows
state_insurance_map.head()

Unnamed: 0,State,District,Year,Quarter,Latitude,Longitude,Amount
0,andaman-&-nicobar-islands,nicobars,2020,Q2,9.17349,92.812846,3.0
1,andaman-&-nicobar-islands,south andaman,2020,Q2,11.665257,92.753094,2.0
2,andaman-&-nicobar-islands,south andaman,2020,Q2,11.665446,92.733193,1.0
3,andaman-&-nicobar-islands,south andaman,2020,Q3,11.653981,92.743142,8.0
4,andaman-&-nicobar-islands,south andaman,2020,Q3,11.654075,92.723246,8.0


In [18]:
# Data Transformation
state_insurance_map["State"] = state_insurance_map["State"].replace(state_dict)
state_insurance_map["District"] = state_insurance_map["District"].str.title()

# Create a Csv files for data frame
state_insurance_map.to_csv("Data/map/state_map_insurance.csv", index=False)
state_insurance_map.head()

Unnamed: 0,State,District,Year,Quarter,Latitude,Longitude,Amount
0,Andaman & Nicobar Islands,Nicobars,2020,Q2,9.17349,92.812846,3.0
1,Andaman & Nicobar Islands,South Andaman,2020,Q2,11.665257,92.753094,2.0
2,Andaman & Nicobar Islands,South Andaman,2020,Q2,11.665446,92.733193,1.0
3,Andaman & Nicobar Islands,South Andaman,2020,Q3,11.653981,92.743142,8.0
4,Andaman & Nicobar Islands,South Andaman,2020,Q3,11.654075,92.723246,8.0


## Map - Insurance - Hover
This data will generate Hover insurance payments for each district
<br/>
- States: States <br/>
- Year : Year<br/>
- Quarter : Quarter<br/>
- Count : Count<br/>
- Amount : Amount<br/>

In [19]:
### Insurance by Map Hover
path = "Phonepe-pulse-master/data/map/insurance/hover/country/india/"

# path of Years
year = os.listdir(path)


data = {
    
    "Year":[],
    "Quarter":[],
    "State": [],
    "Count": [],
    "Amount": [],
    "Average Transaction" : []
    
}


# Years
for j in years:
    p_j = path + j + "/"
    quarters = os.listdir(p_j) # opening years


    # Quarters 
    for k in quarters:
        p_k = p_j + k
        Data = open(p_k, "r")
        d = json.load(Data)


        try:
            for z in d["data"]["hoverDataList"]:
                name = z["name"]
                count = z["metric"][0]["count"]
                amount = z["metric"][0]["amount"]
                transaction = amount/count

                # updating the data frame

                data["Year"].append(j)
                data["Quarter"].append("Q"+str(int(k.strip(".json"))))
                data["State"].append(name)
                data["Count"].append(count)
                data["Amount"].append(amount)
                data["Average Transaction"].append(transaction)
                

        except:
            pass


# Creating a Data frame
insurance_hover = pd.DataFrame(data)


# Checking the top 5 rows
insurance_hover.head()

Unnamed: 0,Year,Quarter,State,Count,Amount,Average Transaction
0,2020,Q2,puducherry,112,22251.0,198.669643
1,2020,Q2,tamil nadu,5473,1075552.0,196.519642
2,2020,Q2,uttar pradesh,9884,1912266.0,193.470862
3,2020,Q2,madhya pradesh,6283,1198701.0,190.784816
4,2020,Q2,andhra pradesh,22104,3982391.0,180.166079


In [20]:
# Data Transformation
insurance_hover["State"] = insurance_hover["State"].str.title()


# Create a Csv files for data frame
insurance_hover.to_csv("Data/map/hover/insurance_hover.csv", index=False)
# Checking the top 5 rows
insurance_hover.head()

Unnamed: 0,Year,Quarter,State,Count,Amount,Average Transaction
0,2020,Q2,Puducherry,112,22251.0,198.669643
1,2020,Q2,Tamil Nadu,5473,1075552.0,196.519642
2,2020,Q2,Uttar Pradesh,9884,1912266.0,193.470862
3,2020,Q2,Madhya Pradesh,6283,1198701.0,190.784816
4,2020,Q2,Andhra Pradesh,22104,3982391.0,180.166079


## Map - Insurance - States - Hover
This data will generate Hover insurance payments for each states by district
<br/>
- States: States
- District: District<br/>
- Year : Year<br/>
- Quarter : Quarter<br/>
- Count : Count<br/>
- Amount : Amount<br/>

In [21]:
### Insurance by Map Hover by states
path = "Phonepe-pulse-master/data/map/insurance/hover/country/state/"

# path of States
states =  os.listdir(path)

# creating an empty dictionary to collect all the data
data = {
    "State": [],
    "District": [],
    "Year":[],
    "Quarter":[],
    "Count": [],
    "Amount": [],
    "Average Transaction" : []
    
}

# opening states
for i in states:
    p_i = path + i +"/"
    
    years = os.listdir(p_i) # path of Years
    
    for j in years:
        p_j = p_i + j + "/"
        quarters = os.listdir(p_j) # opening years


        for k in quarters:
            p_k = p_j + k
            Data = open(p_k, "r")
            d = json.load(Data)

            try:
                for z in d["data"]["hoverDataList"]:
                    name = z["name"]
                    count = z["metric"][0]["count"]
                    amount = z["metric"][0]["amount"]
                    transaction = amount/count


                    # updating the data frame 
                    data["State"].append(i)
                    data["District"].append(name.replace(" district", ""))
                    data["Year"].append(j)
                    data["Quarter"].append("Q"+str(int(k.strip(".json"))))
                    data["Count"].append(count)
                    data["Amount"].append(amount)
                    data["Average Transaction"].append(transaction)

            except:
                pass
                

# Creating a Data frame
insurance_state_hover = pd.DataFrame(data)

# Checking the top 5 rows
insurance_state_hover.head()
            

Unnamed: 0,State,District,Year,Quarter,Count,Amount,Average Transaction
0,andaman-&-nicobar-islands,south andaman,2020,Q2,3,795.0,265.0
1,andaman-&-nicobar-islands,nicobars,2020,Q2,3,565.0,188.333333
2,andaman-&-nicobar-islands,north and middle andaman,2020,Q3,1,281.0,281.0
3,andaman-&-nicobar-islands,south andaman,2020,Q3,35,13651.0,390.028571
4,andaman-&-nicobar-islands,nicobars,2020,Q3,5,1448.0,289.6


In [22]:
# Data Transformation
insurance_state_hover["State"] = insurance_state_hover["State"].replace(state_dict)
insurance_state_hover["District"] = insurance_state_hover["District"].str.title()

# Create a Csv files for data frame
insurance_state_hover.to_csv("Data/map/hover/insurance_state_hover.csv", index=False)
# Checking the top 5 rows
insurance_state_hover.head()

Unnamed: 0,State,District,Year,Quarter,Count,Amount,Average Transaction
0,Andaman & Nicobar Islands,South Andaman,2020,Q2,3,795.0,265.0
1,Andaman & Nicobar Islands,Nicobars,2020,Q2,3,565.0,188.333333
2,Andaman & Nicobar Islands,North And Middle Andaman,2020,Q3,1,281.0,281.0
3,Andaman & Nicobar Islands,South Andaman,2020,Q3,35,13651.0,390.028571
4,Andaman & Nicobar Islands,Nicobars,2020,Q3,5,1448.0,289.6


## Map - Transaction  - Hover
This data will generate Hover for transactions in each District
<br/>
- States: States<br/>
- Year : Year<br/>
- Quarter : Quarter<br/>
- Count : Count<br/>
- Amount : Amount<br/>

In [23]:
### Transaction by Map Hover
path = "Phonepe-pulse-master/data/map/transaction/hover/country/india/"

# path of Years
years = os.listdir(path)


data = {
    "Year":[],
    "Quarter":[],
    "State": [],
    "Count": [],
    "Amount": [],
    "Average Transaction" : []
}

# Years
for j in years:
    p_j = path + j + "/"
    quarters = os.listdir(p_j) # opening years
    # Quarters 
    for k in quarters:
        p_k = p_j + k
        Data = open(p_k, "r")
        d = json.load(Data)
        try:
            for z in d["data"]["hoverDataList"]:
                name = z["name"]
                count = z["metric"][0]["count"]
                amount = z["metric"][0]["amount"]
                transaction = amount/count
                # updating the data frame
                data["Year"].append(j)
                data["Quarter"].append("Q"+str(int(k.strip(".json"))))
                data["State"].append(name)
                data["Count"].append(count)
                data["Amount"].append(amount)
                data["Average Transaction"].append(transaction)
        except:
            pass


# Creating a Data frame
transaction_hover = pd.DataFrame(data)


# Checking the top 5 rows
transaction_hover.head()

Unnamed: 0,Year,Quarter,State,Count,Amount,Average Transaction
0,2018,Q1,puducherry,104212,165826000.0,1591.236819
1,2018,Q1,tamil nadu,6726622,11261560000.0,1674.177276
2,2018,Q1,uttar pradesh,12537805,13939970000.0,1111.83504
3,2018,Q1,madhya pradesh,8025395,8681603000.0,1081.766493
4,2018,Q1,andhra pradesh,9039585,11996280000.0,1327.082647


In [24]:
transaction_hover["State"] = transaction_hover["State"].str.title()

# Create a Csv files for data frame
transaction_hover.to_csv("Data/map/hover/transaction_hover.csv", index=False)

# Checking the top 5 rows
transaction_hover.head()

Unnamed: 0,Year,Quarter,State,Count,Amount,Average Transaction
0,2018,Q1,Puducherry,104212,165826000.0,1591.236819
1,2018,Q1,Tamil Nadu,6726622,11261560000.0,1674.177276
2,2018,Q1,Uttar Pradesh,12537805,13939970000.0,1111.83504
3,2018,Q1,Madhya Pradesh,8025395,8681603000.0,1081.766493
4,2018,Q1,Andhra Pradesh,9039585,11996280000.0,1327.082647


## Map - Transaction - States - Hover
This data will generate Hover for transactions each states by district
<br/>
- States: States
- District: District<br/>
- Year : Year<br/>
- Quarter : Quarter<br/>
- Count : Count<br/>
- Amount : Amount<br/>

In [25]:
### transaction by Map Hover by states
path = "Phonepe-pulse-master/data/map/transaction/hover/country/state/"

# path of States
states =  os.listdir(path)

# creating an empty dictionary to collect all the data
data = {
    "State": [],
    "District": [],
    "Year":[],
    "Quarter":[],
    "Count": [],
    "Amount": [],
    "Average Transaction" : []
    
}

# opening states
for i in states:
    p_i = path + i +"/"
    
    years = os.listdir(p_i) # path of Years
    
    for j in years:
        p_j = p_i + j + "/"
        quarters = os.listdir(p_j) # opening years


        for k in quarters:
            p_k = p_j + k
            Data = open(p_k, "r")
            d = json.load(Data)

            try:
                for z in d["data"]["hoverDataList"]:
                    name = z["name"]
                    count = z["metric"][0]["count"]
                    amount = z["metric"][0]["amount"]
                    transaction = amount/count


                    # updating the data frame 
                    data["State"].append(i)
                    data["District"].append(name.replace(" district", ""))
                    data["Year"].append(j)
                    data["Quarter"].append("Q"+str(int(k.strip(".json"))))
                    data["Count"].append(count)
                    data["Amount"].append(amount)
                    data["Average Transaction"].append(transaction)

            except:
                pass

# Creating a Data frame
Transaction_state_hover = pd.DataFrame(data)

# Checking the top 5 rows
Transaction_state_hover.head()

Unnamed: 0,State,District,Year,Quarter,Count,Amount,Average Transaction
0,andaman-&-nicobar-islands,north and middle andaman,2018,Q1,442,931663.1,2107.835016
1,andaman-&-nicobar-islands,south andaman,2018,Q1,5688,12560250.0,2208.201361
2,andaman-&-nicobar-islands,nicobars,2018,Q1,528,1139849.0,2158.804548
3,andaman-&-nicobar-islands,north and middle andaman,2018,Q2,825,1317863.0,1597.409798
4,andaman-&-nicobar-islands,south andaman,2018,Q2,9395,23948240.0,2549.040502


In [26]:
# Data Transformation
Transaction_state_hover["State"] = Transaction_state_hover["State"].replace(state_dict)
Transaction_state_hover["District"] = Transaction_state_hover["District"].str.title()
# Create a Csv files for data frame
Transaction_state_hover.to_csv("Data/map/hover/transaction_state_hover.csv", index=False)
# Checking the top 5 rows
Transaction_state_hover.head()

Unnamed: 0,State,District,Year,Quarter,Count,Amount,Average Transaction
0,Andaman & Nicobar Islands,North And Middle Andaman,2018,Q1,442,931663.1,2107.835016
1,Andaman & Nicobar Islands,South Andaman,2018,Q1,5688,12560250.0,2208.201361
2,Andaman & Nicobar Islands,Nicobars,2018,Q1,528,1139849.0,2158.804548
3,Andaman & Nicobar Islands,North And Middle Andaman,2018,Q2,825,1317863.0,1597.409798
4,Andaman & Nicobar Islands,South Andaman,2018,Q2,9395,23948240.0,2549.040502


## Map - Users  - Hover
This data will generate Hover for Users in  each District
<br/>
- States: States<br/>
- Year : Year<br/>
- Quarter : Quarter<br/>
- RegisteredUsers : Registered Users<br/>
- Appopens : App opens<br/>
- Average Opens: Average Opens per users

In [27]:
### Users by Map Hover
path = "Phonepe-pulse-master/data/map/user/hover/country/india/"

# creating an empty dictionary to collect all the data
data = {
    
    "Year":[],
    "Quarter":[],
    "State": [],
    "Registered Users": [],
    "App Opens" : [],
    "Average Opens": []
    
}

# path of Years
years = os.listdir(path)

# opening the Years folders
for j in years:
    p_j = path + j + "/"
    quarters = os.listdir(p_j)


    # Quarters files
    for k in quarters:
        p_k = p_j + k
        Data = open(p_k, "r")
        d = json.load(Data)

        try:
            # Extract hover data
            hover_data = d["data"]["hoverData"]

            # Loop through hover data and populate data dictionary
            for state, info in hover_data.items():
                users = info["registeredUsers"]
                opens = info["appOpens"]
                average_opens = opens / users
                
                data["State"].append(state)
                data["Registered Users"].append(users)
                data["App Opens"].append(opens)
                data["Average Opens"].append(average_opens)
                data["Year"].append(j)
                data["Quarter"].append("Q"+str(int(k.strip(".json"))))
                
        
        except:
            pass



# Creating a Data frame
Users_hover = pd.DataFrame(data)


# Checking the top 5 rows
Users_hover.head()
    

Unnamed: 0,Year,Quarter,State,Registered Users,App Opens,Average Opens
0,2018,Q1,puducherry,49318,0,0.0
1,2018,Q1,tamil nadu,2104754,0,0.0
2,2018,Q1,uttar pradesh,4694250,0,0.0
3,2018,Q1,madhya pradesh,2553603,0,0.0
4,2018,Q1,andhra pradesh,3336450,0,0.0


In [28]:
# Data Tranformation
Users_hover["State"] = Users_hover["State"].str.title()

# Create a Csv files for data frame
Users_hover.to_csv("Data/map/hover/Users_hover.csv", index=False)
# Checking the top 5 rows
Users_hover.head()

Unnamed: 0,Year,Quarter,State,Registered Users,App Opens,Average Opens
0,2018,Q1,Puducherry,49318,0,0.0
1,2018,Q1,Tamil Nadu,2104754,0,0.0
2,2018,Q1,Uttar Pradesh,4694250,0,0.0
3,2018,Q1,Madhya Pradesh,2553603,0,0.0
4,2018,Q1,Andhra Pradesh,3336450,0,0.0


## Map - Users  - Hover
This data will generate Hover for Users in  each District
<br/>
- States: States<br/>
- District: District<br/>
- Year : Year<br/>
- Quarter : Quarter<br/>
- RegisteredUsers : Registered Users<br/>
- Appopens : App opens<br/>
- Average Opens: Average Opens per users

In [29]:
### Users by Map Hover
path = "Phonepe-pulse-master/data/map/user/hover/country/state/"

# creating an empty dictionary to collect all the data
data = {
    
    "Year":[],
    "Quarter":[],
    "State": [],
    "District": [],
    "Registered Users": [],
    "App Opens" : [],
    "Average Opens": []
    
}
# path of States
states = os.listdir(path)

# opening states
for i in states:
    p_i = path + i + "/"
    years = os.listdir(p_i)

    # opening years
    for j in years:
        p_j = p_i + j + "/"
        quarters = os.listdir(p_j)

        # opening Quarters
        for k in quarters:
            p_k = p_j + k
            Data = open(p_k, "r")
            d = json.load(Data)

            try:
                # Extract hover data
                hover_data = d["data"]["hoverData"]

            # Loop through hover data and populate data dictionary
                for district, info in hover_data.items():
                    users = info["registeredUsers"]
                    opens = info["appOpens"]
                    average_opens = opens / users


                    data["State"].append(i)
                    data["District"].append(district.replace(" district", ""))
                    data["Registered Users"].append(users)
                    data["App Opens"].append(opens)
                    data["Average Opens"].append(average_opens)
                    data["Year"].append(j)
                    data["Quarter"].append("Q"+str(int(k.strip(".json"))))

            except:
                pass

# Creating a Data frame
Users_states_hover = pd.DataFrame(data)

# Checking the top 5 rows
Users_states_hover.head()

Unnamed: 0,Year,Quarter,State,District,Registered Users,App Opens,Average Opens
0,2018,Q1,andaman-&-nicobar-islands,north and middle andaman,632,0,0.0
1,2018,Q1,andaman-&-nicobar-islands,south andaman,5846,0,0.0
2,2018,Q1,andaman-&-nicobar-islands,nicobars,262,0,0.0
3,2018,Q2,andaman-&-nicobar-islands,north and middle andaman,911,0,0.0
4,2018,Q2,andaman-&-nicobar-islands,south andaman,8143,0,0.0


In [30]:
# Data Transformation
Users_states_hover["State"] = Users_states_hover["State"].replace(state_dict)
Users_states_hover["District"] = Users_states_hover["District"].str.title()

# Create a Csv files for data frame
Users_states_hover.to_csv("Data/map/hover/Users_states_hover.csv", index=False)
# Checking the top 5 rows
Users_states_hover.head()

Unnamed: 0,Year,Quarter,State,District,Registered Users,App Opens,Average Opens
0,2018,Q1,Andaman & Nicobar Islands,North And Middle Andaman,632,0,0.0
1,2018,Q1,Andaman & Nicobar Islands,South Andaman,5846,0,0.0
2,2018,Q1,Andaman & Nicobar Islands,Nicobars,262,0,0.0
3,2018,Q2,Andaman & Nicobar Islands,North And Middle Andaman,911,0,0.0
4,2018,Q2,Andaman & Nicobar Islands,South Andaman,8143,0,0.0


## Top - Insurance - States
This data will generate Hover for Users in  each District
<br/>
- States: States<br/>
- Pincode: Pincode<br/>
- Year : Year<br/>
- Quarter : Quarter<br/>
- count : Count of transaction<br/>
- amount : amount<br/>

In [31]:
### Users by Top insurance
path = "Phonepe-pulse-master/data/top/insurance/country/state/"

# creating an empty dictionary to collect all the data
data = {
    "Year":[],
    "Quarter":[],
    "State": [],
    "Pincode": [],
    "Count": [],
    "Amount" : [],
    "Average Transaction": []
}
# path of States
states = os.listdir(path)
# opening states
for i in states:
    p_i = path + i + "/"
    # opening years
    years = os.listdir(p_i)

    for j in years:
        p_j = p_i + j + "/"
        # opening Quarters
        quarters = os.listdir(p_j)


        for k in quarters:
            p_k = p_j + k
            Data = open(p_k, "r")
            d = json.load(Data)


            try:
                for z in d["data"]["pincodes"]:
                    pincode = z["entityName"]
                    count = z["metric"]["count"]
                    amount = z["metric"]["amount"]
                    average = amount/count

                    # filling the dataset
                    data["Year"].append(j)
                    data["Quarter"].append("Q"+str(int(k.strip(".json"))))
                    data["State"].append(i)
                    data["Pincode"].append(pincode)
                    data["Count"].append(count)
                    data["Amount"].append(amount)
                    data["Average Transaction"].append(average)

            except:
                pass

# Creating a Data frame
Top_Insurance = pd.DataFrame(data)


# Checking the top 5 rows
Top_Insurance.head()

Unnamed: 0,Year,Quarter,State,Pincode,Count,Amount,Average Transaction
0,2020,Q2,andaman-&-nicobar-islands,744301,3,565.0,188.333333
1,2020,Q2,andaman-&-nicobar-islands,744104,2,513.0,256.5
2,2020,Q2,andaman-&-nicobar-islands,744101,1,282.0,282.0
3,2020,Q3,andaman-&-nicobar-islands,744112,9,3432.0,381.333333
4,2020,Q3,andaman-&-nicobar-islands,744105,7,3948.0,564.0


In [32]:
# Data Transformation
Top_Insurance["State"] = Top_Insurance["State"].replace(state_dict)

# Create a Csv files for data frame
Top_Insurance.to_csv("Data/top/Top_Pincode_Insurance.csv", index=False)

# Checking the top 5 rows
Top_Insurance.head()

Unnamed: 0,Year,Quarter,State,Pincode,Count,Amount,Average Transaction
0,2020,Q2,Andaman & Nicobar Islands,744301,3,565.0,188.333333
1,2020,Q2,Andaman & Nicobar Islands,744104,2,513.0,256.5
2,2020,Q2,Andaman & Nicobar Islands,744101,1,282.0,282.0
3,2020,Q3,Andaman & Nicobar Islands,744112,9,3432.0,381.333333
4,2020,Q3,Andaman & Nicobar Islands,744105,7,3948.0,564.0


In [33]:

path = "Phonepe-pulse-master/data/top/insurance/country/state/"


data = {
    
    "Year":[],
    "Quarter":[],
    "State": [],
    "District": [],
    "Count": [],
    "Amount" : [],
    "Average Transaction": []
    
}

states = os.listdir(path)

for i in states:
    p_i = path + i + "/"

    years = os.listdir(p_i)

    for j in years:
        p_j = p_i + j + "/"

        quarters = os.listdir(p_j)


        for k in quarters:
            p_k = p_j + k
            Data = open(p_k, "r")
            d = json.load(Data)


            try:
                for z in d["data"]["districts"]:
                    pincode = z["entityName"]
                    count = z["metric"]["count"]
                    amount = z["metric"]["amount"]
                    average = amount/count

                    # filling the dataset
                    data["Year"].append(j)
                    data["Quarter"].append("Q"+str(int(k.strip(".json"))))
                    data["State"].append(i)
                    data["District"].append(pincode.replace(" district", ""))
                    data["Count"].append(count)
                    data["Amount"].append(amount)
                    data["Average Transaction"].append(average)

            except:
                pass


# Creating a Data frame
Top_Insurance = pd.DataFrame(data)


# Checking the top 5 rows
Top_Insurance.head()

Unnamed: 0,Year,Quarter,State,District,Count,Amount,Average Transaction
0,2020,Q2,andaman-&-nicobar-islands,nicobars,3,565.0,188.333333
1,2020,Q2,andaman-&-nicobar-islands,south andaman,3,795.0,265.0
2,2020,Q3,andaman-&-nicobar-islands,south andaman,35,13651.0,390.028571
3,2020,Q3,andaman-&-nicobar-islands,nicobars,5,1448.0,289.6
4,2020,Q3,andaman-&-nicobar-islands,north and middle andaman,1,281.0,281.0


In [34]:
# Data Transformation
Top_Insurance["State"] = Top_Insurance["State"].replace(state_dict)
Top_Insurance["District"] = Top_Insurance["District"].str.title()

# Create a Csv files for data frame
Top_Insurance.to_csv("Data/top/Top_States_Insurance.csv", index=False)
# Checking the top 5 rows
Top_Insurance.head()

Unnamed: 0,Year,Quarter,State,District,Count,Amount,Average Transaction
0,2020,Q2,Andaman & Nicobar Islands,Nicobars,3,565.0,188.333333
1,2020,Q2,Andaman & Nicobar Islands,South Andaman,3,795.0,265.0
2,2020,Q3,Andaman & Nicobar Islands,South Andaman,35,13651.0,390.028571
3,2020,Q3,Andaman & Nicobar Islands,Nicobars,5,1448.0,289.6
4,2020,Q3,Andaman & Nicobar Islands,North And Middle Andaman,1,281.0,281.0


## Top - Transaction - States
This data will generate Hover for Users in  each District
<br/>
- States: States<br/>
- Pincode: Pincode<br/>
- Year : Year<br/>
- Quarter : Quarter<br/>
- count : Count of transaction<br/>
- amount : amount<br/>

In [35]:
path = "Phonepe-pulse-master/data/top/transaction/country/state/"


data = {
    
    "Year":[],
    "Quarter":[],
    "State": [],
    "Pincode": [],
    "Count": [],
    "Amount" : [],
    "Average Transaction": []
    
}

states = os.listdir(path)

for i in states:
    p_i = path + i + "/"

    years = os.listdir(p_i)

    for j in years:
        p_j = p_i + j + "/"

        quarters = os.listdir(p_j)


        for k in quarters:
            p_k = p_j + k
            Data = open(p_k, "r")
            d = json.load(Data)


            try:
                for z in d["data"]["pincodes"]:
                    pincode = z["entityName"]
                    count = z["metric"]["count"]
                    amount = z["metric"]["amount"]
                    average = amount/count

                    # filling the dataset
                    data["Year"].append(j)
                    data["Quarter"].append("Q"+str(int(k.strip(".json"))))
                    data["State"].append(i)
                    data["Pincode"].append(pincode)
                    data["Count"].append(count)
                    data["Amount"].append(amount)
                    data["Average Transaction"].append(average)

            except:
                pass


# Creating a Data frame
Top_Transaction = pd.DataFrame(data)


# Checking the top 5 rows
Top_Transaction.head()

Unnamed: 0,Year,Quarter,State,Pincode,Count,Amount,Average Transaction
0,2018,Q1,andaman-&-nicobar-islands,744101,1622,2769298.0,1707.335329
1,2018,Q1,andaman-&-nicobar-islands,744103,1223,2238042.0,1829.960647
2,2018,Q1,andaman-&-nicobar-islands,744102,969,3519060.0,3631.640802
3,2018,Q1,andaman-&-nicobar-islands,744105,685,1298561.0,1895.709418
4,2018,Q1,andaman-&-nicobar-islands,744104,340,1039715.0,3057.986209


In [36]:
# Data Transformation
Top_Transaction["State"] = Top_Transaction["State"].replace(state_dict)

# Create a Csv files for data frame
Top_Transaction.to_csv("Data/top/Top_Pincode_Transaction.csv", index=False)
# Checking the top 5 rows
Top_Transaction.head()

Unnamed: 0,Year,Quarter,State,Pincode,Count,Amount,Average Transaction
0,2018,Q1,Andaman & Nicobar Islands,744101,1622,2769298.0,1707.335329
1,2018,Q1,Andaman & Nicobar Islands,744103,1223,2238042.0,1829.960647
2,2018,Q1,Andaman & Nicobar Islands,744102,969,3519060.0,3631.640802
3,2018,Q1,Andaman & Nicobar Islands,744105,685,1298561.0,1895.709418
4,2018,Q1,Andaman & Nicobar Islands,744104,340,1039715.0,3057.986209


In [37]:
path = "Phonepe-pulse-master/data/top/transaction/country/state/"


data = {
    
    "Year":[],
    "Quarter":[],
    "State": [],
    "District": [],
    "Count": [],
    "Amount" : [],
    "Average Transaction": []
    
}

states = os.listdir(path)

for i in states:
    p_i = path + i + "/"

    years = os.listdir(p_i)

    for j in years:
        p_j = p_i + j + "/"

        quarters = os.listdir(p_j)


        for k in quarters:
            p_k = p_j + k
            Data = open(p_k, "r")
            d = json.load(Data)


            try:
                for z in d["data"]["districts"]:
                    pincode = z["entityName"]
                    count = z["metric"]["count"]
                    amount = z["metric"]["amount"]
                    average = amount/count

                    # filling the dataset
                    data["Year"].append(j)
                    data["Quarter"].append("Q"+str(int(k.strip(".json"))))
                    data["State"].append(i)
                    data["District"].append(pincode.replace(" district", ""))
                    data["Count"].append(count)
                    data["Amount"].append(amount)
                    data["Average Transaction"].append(average)

            except:
                pass


# Creating a Data frame
Top_Transaction = pd.DataFrame(data)


# Checking the top 5 rows
Top_Transaction.head()

Unnamed: 0,Year,Quarter,State,District,Count,Amount,Average Transaction
0,2018,Q1,andaman-&-nicobar-islands,south andaman,5688,12560250.0,2208.201361
1,2018,Q1,andaman-&-nicobar-islands,nicobars,528,1139849.0,2158.804548
2,2018,Q1,andaman-&-nicobar-islands,north and middle andaman,442,931663.1,2107.835016
3,2018,Q2,andaman-&-nicobar-islands,south andaman,9395,23948240.0,2549.040502
4,2018,Q2,andaman-&-nicobar-islands,nicobars,1120,3072437.0,2743.247239


In [38]:
# Data Transformation
Top_Transaction["State"] = Top_Transaction["State"].replace(state_dict)
Top_Transaction["District"] = Top_Transaction["District"].str.title()



# Create a Csv files for data frame
Top_Transaction.to_csv("Data/top/Top_Districts_Transaction.csv", index=False)
# Checking the top 5 rows
Top_Transaction.head()

Unnamed: 0,Year,Quarter,State,District,Count,Amount,Average Transaction
0,2018,Q1,Andaman & Nicobar Islands,South Andaman,5688,12560250.0,2208.201361
1,2018,Q1,Andaman & Nicobar Islands,Nicobars,528,1139849.0,2158.804548
2,2018,Q1,Andaman & Nicobar Islands,North And Middle Andaman,442,931663.1,2107.835016
3,2018,Q2,Andaman & Nicobar Islands,South Andaman,9395,23948240.0,2549.040502
4,2018,Q2,Andaman & Nicobar Islands,Nicobars,1120,3072437.0,2743.247239


## Top - Users - States
This data will generate Hover for Users in  each District
<br/>
- States: States<br/>
- Pincode: Pincode<br/>
- Year : Year<br/>
- Quarter : Quarter<br/>
- Registered Users : Registered Users<br/>

In [39]:
path = "Phonepe-pulse-master/data/top/user/country/state/"

data = {
    
    "Year":[],
    "Quarter":[],
    "State": [],
    "Pincode": [],
    "Registered Users" : []
    
}


states = os.listdir(path)

for i in states:
    p_i = path + i + "/"

    years = os.listdir(p_i)

    for j in years:
        p_j = p_i + j + "/"

        quarters = os.listdir(p_j)


        for k in quarters:
            p_k = p_j + k
            Data = open(p_k, "r")
            d = json.load(Data)


            try:
                for z in d["data"]["pincodes"]:
                    pincode = z["name"]
                    users = z["registeredUsers"]
                    

                    # filling the dataset
                    data["Year"].append(j)
                    data["Quarter"].append("Q"+str(int(k.strip(".json"))))
                    data["State"].append(i)
                    data["Pincode"].append(pincode)
                    data["Registered Users"].append(users)
                    

            except:
                pass


# Creating a Data frame
Top_Users= pd.DataFrame(data)


# Checking the top 5 rows
Top_Users.head()
            

Unnamed: 0,Year,Quarter,State,Pincode,Registered Users
0,2018,Q1,andaman-&-nicobar-islands,744103,1608
1,2018,Q1,andaman-&-nicobar-islands,744101,1108
2,2018,Q1,andaman-&-nicobar-islands,744105,1075
3,2018,Q1,andaman-&-nicobar-islands,744102,1006
4,2018,Q1,andaman-&-nicobar-islands,744104,272


In [40]:
# Data Transformation
Top_Users["State"] = Top_Users["State"].replace(state_dict)


# Create a Csv files for data frame
Top_Users.to_csv("Data/top/Top_Pincode_Users.csv", index=False)
# Checking the top 5 rows
Top_Users.head()

Unnamed: 0,Year,Quarter,State,Pincode,Registered Users
0,2018,Q1,Andaman & Nicobar Islands,744103,1608
1,2018,Q1,Andaman & Nicobar Islands,744101,1108
2,2018,Q1,Andaman & Nicobar Islands,744105,1075
3,2018,Q1,Andaman & Nicobar Islands,744102,1006
4,2018,Q1,Andaman & Nicobar Islands,744104,272


In [41]:
path = "Phonepe-pulse-master/data/top/user/country/state/"

data = {
    
    "Year":[],
    "Quarter":[],
    "State": [],
    "District": [],
    "Registered Users" : []
    
}


states = os.listdir(path)

for i in states:
    p_i = path + i + "/"

    years = os.listdir(p_i)

    for j in years:
        p_j = p_i + j + "/"

        quarters = os.listdir(p_j)


        for k in quarters:
            p_k = p_j + k
            Data = open(p_k, "r")
            d = json.load(Data)


            try:
                for z in d["data"]["districts"]:
                    pincode = z["name"]
                    users = z["registeredUsers"]
                    

                    # filling the dataset
                    data["Year"].append(j)
                    data["Quarter"].append("Q"+str(int(k.strip(".json"))))
                    data["State"].append(i)
                    data["District"].append(pincode)
                    data["Registered Users"].append(users)
                    

            except:
                pass


# Creating a Data frame
Top_Users= pd.DataFrame(data)

# Checking the top 5 rows
Top_Users.head()
            

Unnamed: 0,Year,Quarter,State,District,Registered Users
0,2018,Q1,andaman-&-nicobar-islands,south andaman,5846
1,2018,Q1,andaman-&-nicobar-islands,north and middle andaman,632
2,2018,Q1,andaman-&-nicobar-islands,nicobars,262
3,2018,Q2,andaman-&-nicobar-islands,south andaman,8143
4,2018,Q2,andaman-&-nicobar-islands,north and middle andaman,911


In [42]:
# Data Transformation
Top_Users["State"] = Top_Users["State"].replace(state_dict)
Top_Users["District"] = Top_Users["District"].str.title()

# Create a Csv files for data frame
Top_Users.to_csv("Data/top/Top_Districts_Users.csv", index=False)
# Checking the top 5 rows
Top_Users.head()

Unnamed: 0,Year,Quarter,State,District,Registered Users
0,2018,Q1,Andaman & Nicobar Islands,South Andaman,5846
1,2018,Q1,Andaman & Nicobar Islands,North And Middle Andaman,632
2,2018,Q1,Andaman & Nicobar Islands,Nicobars,262
3,2018,Q2,Andaman & Nicobar Islands,South Andaman,8143
4,2018,Q2,Andaman & Nicobar Islands,North And Middle Andaman,911
