In [17]:
import os
import pandas as pd

# Define the folder where the CSV files are located
folder_path = "D:/dataset/UPI/applications"  # Replace with your folder path


In [18]:
# Initialize an empty list to store DataFrames
dataframes = []

# Iterate through all files in the folder
for file_name in sorted(os.listdir(folder_path)):
    if file_name.endswith(".csv"):
        file_path = os.path.join(folder_path, file_name)
        # Read the CSV file into a DataFrame
        df = pd.read_csv(file_path)

        # Extract year and month from the file name
        year = 2000 + int(file_name[-6:-4])  # Extract year from file name
        month = {
            'jan': 1, 'feb': 2, 'mar': 3, 'apr': 4, 'may': 5, 'jun': 6,
            'jul': 7, 'aug': 8, 'sep': 9, 'oct': 10, 'nov': 11, 'dec': 12
        }.get(file_name[:3].lower(), 0)  # Extract month from file name

        # Add year and month columns to the DataFrame
        df['Year'] = year
        df['Month'] = month

        # Append the DataFrame to the list
        dataframes.append(df)


In [19]:
# Combine all DataFrames into a single DataFrame
combined_df = pd.concat(dataframes, ignore_index=True)

combined_df.head()

Unnamed: 0,Sr.No.,Application Name,Volume Customer(Mn),Value Customer (Cr),Volume B2C (Mn),Value B2C (Cr),Volume B2B(Mn),Value B2B(Cr),Volume (Mn),Value (Cr),Year,Month
0,1,Airtel Payments Bank Apps,5.29,454.64,-,-,-,-,5.3,460.9,2022,4
1,2,Amazon Pay,73.21,6699.57,-,-,-,-,73.21,6699.57,2022,4
2,3,AU Small Finance Bank Apps,0.32,109.89,-,-,-,-,0.32,109.89,2022,4
3,4,Axis Bank Apps,7.19,1213.44,56.84,43.71,0,0,64.03,1257.15,2022,4
4,5,Bajaj Finserv,0.52,38.61,-,-,-,-,0.52,38.61,2022,4


In [20]:
# Reorder the columns to place 'Year' and 'Month' as the 3rd and 4th columns
columns = list(combined_df.columns)
columns.insert(2, columns.pop(columns.index('Year')))
columns.insert(3, columns.pop(columns.index('Month')))
combined_df = combined_df[columns]

combined_df.head()

Unnamed: 0,Sr.No.,Application Name,Year,Month,Volume Customer(Mn),Value Customer (Cr),Volume B2C (Mn),Value B2C (Cr),Volume B2B(Mn),Value B2B(Cr),Volume (Mn),Value (Cr)
0,1,Airtel Payments Bank Apps,2022,4,5.29,454.64,-,-,-,-,5.3,460.9
1,2,Amazon Pay,2022,4,73.21,6699.57,-,-,-,-,73.21,6699.57
2,3,AU Small Finance Bank Apps,2022,4,0.32,109.89,-,-,-,-,0.32,109.89
3,4,Axis Bank Apps,2022,4,7.19,1213.44,56.84,43.71,0,0,64.03,1257.15
4,5,Bajaj Finserv,2022,4,0.52,38.61,-,-,-,-,0.52,38.61


In [21]:
# Define the columns to check for numeric values
numeric_columns = [
    "Volume Customer(Mn)", "Value Customer (Cr)", "Volume B2C (Mn)",
    "Value B2C (Cr)", "Volume B2B(Mn)", "Value B2B(Cr)",
    "Volume (Mn)", "Value (Cr)"
]

# Remove commas and convert to numeric, forcing invalid entries to NaN
for col in numeric_columns:
    combined_df[col] = pd.to_numeric(combined_df[col].replace({',': ''}, regex=True), errors='coerce')

# Check for any invalid (NaN) values
invalid_entries = combined_df[numeric_columns].isna().sum()
print("Invalid entries in each column:\n", invalid_entries)

Invalid entries in each column:
 Volume Customer(Mn)      0
Value Customer (Cr)      0
Volume B2C (Mn)        952
Value B2C (Cr)         953
Volume B2B(Mn)         836
Value B2B(Cr)          834
Volume (Mn)              0
Value (Cr)               0
dtype: int64


In [6]:
combined_df.head()

Unnamed: 0,Sr.No.,Application Name,Year,Month,Volume Customer(Mn),Value Customer (Cr),Volume B2C (Mn),Value B2C (Cr),Volume B2B(Mn),Value B2B(Cr),Volume (Mn),Value (Cr)
0,1,Airtel Payments Bank Apps,2022,4,5.29,454.64,,,,,5.3,460.9
1,2,Amazon Pay,2022,4,73.21,6699.57,,,,,73.21,6699.57
2,3,AU Small Finance Bank Apps,2022,4,0.32,109.89,,,,,0.32,109.89
3,4,Axis Bank Apps,2022,4,7.19,1213.44,56.84,43.71,0.0,0.0,64.03,1257.15
4,5,Bajaj Finserv,2022,4,0.52,38.61,,,,,0.52,38.61


In [22]:
# Get all unique names in the 'Application Name' column
unique_applications = combined_df['Application Name'].unique()
unique_applications.sort()

for i in unique_applications:
    print(i)

AU Small Finance Bank App
AU Small Finance Bank Apps
Aditya Birla Capital Digital
Aditya Birla Capital Digital #
Aditya Birla Capital Digital Limited
Airtel Payments Bank Apps
Akudo & Muvin (Livquik)
Allahabad Bank App
Allahabad Bank Apps
Amazon Pay
Amazon Pay #
Andhra Bank Apps
Au Small Finance Bank Apps
Axis Bank Apps
BHIM
BHIM #
BPAYWALLET
Bajaj Finserv
Bajaj Finserv #
Bajaj Finserv PPI
Bajaj Markets
Bajaj Pay Wallet
Bandhan Bank Apps
Bank of America Apps
Bank of Baroda Apps
Bank of India App
Bank of India Apps
Bank of Maharashtra App
Bank of Maharashtra Apps
Bharat Pe
Bharat Pe #
CBDC
CBDC Apps
Canara Bank App
Canara Bank Apps
Central Bank of India App
Central Bank of India Apps
Cheq
Citi Bank App
Citi Bank Apps
City Union Bank App
City Union Bank Apps
Cointab
Cred
Cred #
DBS Bank Apps
DBS Digibank App
DBS Digibank Apps
DHANI
DIGIKHATA
Dena Bank App
Dena Bank Apps
Deutsche Bank App
Deutsche Bank Apps
Dhani
Dhani #
Dhani PPI
Dhanlaxmi Bank App
Dhanlaxmi Bank Apps
Digikhata
Digikhata

In [23]:
app_names = [
[["AU Small Finance Bank App"],["AU Small Finance Bank Apps","Au Small Finance Bank Apps"]],
[["Aditya Birla Capital Digital"],["Aditya Birla Capital Digital #","Aditya Birla Capital Digital Limited"]],
[["Allahabad Bank App"],["Allahabad Bank Apps"]],
[["Amazon Pay"],["Amazon Pay #"]],
[["BHIM"],["BHIM #"]],
[["Bajaj Finserv"],["Bajaj Finserv #","Bajaj Finserv PPI","Bajaj Markets","Bajaj Pay Wallet"]],
[["Bank of India App"],["Bank of India Apps"]],
[["Bank of Maharashtra App"],["Bank of Maharashtra Apps"]],
[["Bharat Pe"],["Bharat Pe #"]],
[["CBDC"],["CBDC Apps"]],
[["Canara Bank App"],["Canara Bank Apps"]],
[["Central Bank of India App"],["Central Bank of India Apps"]],
[["Citi Bank App"],["Citi Bank Apps"]],
[["City Union Bank App"],["City Union Bank Apps"]],
[["Cred"],["Cred #"]],
[["DBS Bank Apps"],["DBS Digibank App","DBS Digibank Apps"]],
[["Dena Bank App"],["Dena Bank Apps"]],
[["Deutsche Bank App"],["Deutsche Bank Apps"]],
[["Dhani"],["Dhani #","Dhani PPI","DHANI"]],
[["Dhanlaxmi Bank App"],["Dhanlaxmi Bank Apps"]],
[["Digikhata"],["Digikhata #","Digikhata PPI","DIGIKHATA"]],
[["Equitas Small Apps"],["Equitas Small Finance Bank Apps"]],
[["EROUTE"],["Eroute"]],
[["FamApp by Trio"],["Fam App by Trio #","FamApp by Trio #","Fampay","FamPay PPI","FamPay by Trio #","Fampay PPI","Fampay by Trio #"]],
[["Fave"],["Fave #"]],
[["Federal Bank App"],["Federal Bank Apps"]],
[["Fino Payments Bank App"],["Fino Payments Bank Apps","Fino Payments bank Apps","Fino Payments bank App"]],
[["Flipkart UPI"],["Flipkart","Flipkart UPI #"]],
[["Freo #"],["Freo Money #"]],
[["GoKiwi"],["Go Kiwi","KIWI","Kiwi","Kiwi #","Gokiwi"]],
[["GoNiyo"],["Go Niyo","GoNiyo PPI"]],
[["Google Pay"],["Google Pay #"]],
[["Groww"],["Groww #"]],
[["HSBC Bank App"],["HSBC Bank Apps"]],
[["IDBI Bank App"],["IDBI Bank Apps"]],
[["IDFC First Bank Apps"],["IDFC Bank App","IDFC Bank Apps"]],
[["INDIE"],["INDIE - Indus Ind","IndusInd Bank App","IndusInd Bank Apps"]],
[["India Post Payments Bank App"],["India Post Payments Bank Apps"]],
[["Indian Bank App"],["Indian Bank Apps"]],
[["Jammu and Kashmir Bank App"],["Jammu and Kashmir Bank Apps"]],
[["Janta Sahakari Bank App"],["Janata Sahakari Bank Apps"]],
[["Jio Payments Bank App"],["Jio Payments Bank Apps"]],
[["Jupiter Money"],["Jupiter","Jupiter Edge (LivQuick PPI App)","Jupiter Edge (LivQuick PPI)","Jupiter Money #","Jupiter Edge"]],
[["Karnataka Bank App"],["Karnataka Bank Apps"]],
[["Karur Vysya Bank App"],["Karur Vysya Bank Apps"]],
[["LivQuick PPI"],["LivQuik Apps #"]],
[["MakeMyTrip"],["MakeMy Trip"]],
[["Mobikwik"],["Mobikwik #","Mobikwik PPI"]],
[["NSDL Payments Bank App"],["NSDL Payments Bank Apps"]],
[["Navi"],["Navi #"]],
[["Niyo Global"],["Niyo Global #"]],
[["OMNI"],["Omni Card","OmniCard","OmniCard #","Omnicard","Omnicard #","Omnicard PPI"]],
[["Others"],["Other Apps","Other"]],
[["POPClub #"],["POPClub#"]],
[["Paytm"],["Paytm #","PAYTMWALLET","Paytm (OCL ) #","Paytm (OCL)","Paytm (OCL) #","Paytm Payments Bank App"]],
[["PhonePe"],["PhonePe #","Phonepe #"]],
[["Pop Club"],["Pop Club #","POPClub #"]],
[["Punjab National Bank App"],["Punjab National Bank Apps"]],
[["Punjab Sindh Bank App"],["Punjab Sind Bank Apps","Punjab And Sind Bank Apps","Punjab Sindh Bank Apps"]],
[["RBL Bank App"],["RBL Bank Apps","RBLBANK"]],
[["Rapipay"],["Rapi Pay #","Rapi Pay"]],
[["Samsung Pay"],["Samsung Pay #"]],
[["Shriram One"],["SHRIRAM","Shri Ram One #","Shriram","Shriram One #"]],
[["Slice"],["Slice","slice","slice #","slice PPI","Slice #","SLICE"]],
[["South Indian Bank App"],["South Indian Bank Apps"]],
[["Standard Chartered Bank App"],["Standard Chartered Bank Apps"]],
[["SBI Bank Apps"],["State Bank of India Apps"]],
[["Suryoday"],["Suryoday Bank Apps","SURYODAY"]],
[["TataNeu"],["Tata Neu #","TataNeu #"]],
[["Timepay"],["Timepay #"]],
[["Transerv"],["Transerv PPI"]],
[["UCO Bank App"],["UCO Bank Apps"]],
[["Ultracash"],["Ultracash #"]],
[["Union Bank Apps"],["Union Bank Of India Apps"]],
[["Utkarsh Small Finance Bank"],["Utkarsh Small Finance Bank Apps"]],
[["WhatsApp"],["WhatsApp #","Whatsapp #"]],
]

len(app_names)

76

In [24]:
for app in app_names:
    new_name = app[0][0]
    for replace in app[1]:
        old_name = replace
        combined_df['Application Name'] = combined_df['Application Name'].replace(old_name, new_name)
        print(f"Replaced '{old_name}' with '{new_name}'")

Replaced 'AU Small Finance Bank Apps' with 'AU Small Finance Bank App'
Replaced 'Au Small Finance Bank Apps' with 'AU Small Finance Bank App'
Replaced 'Aditya Birla Capital Digital #' with 'Aditya Birla Capital Digital'
Replaced 'Aditya Birla Capital Digital Limited' with 'Aditya Birla Capital Digital'
Replaced 'Allahabad Bank Apps' with 'Allahabad Bank App'
Replaced 'Amazon Pay #' with 'Amazon Pay'
Replaced 'BHIM #' with 'BHIM'
Replaced 'Bajaj Finserv #' with 'Bajaj Finserv'
Replaced 'Bajaj Finserv PPI' with 'Bajaj Finserv'
Replaced 'Bajaj Markets' with 'Bajaj Finserv'
Replaced 'Bajaj Pay Wallet' with 'Bajaj Finserv'
Replaced 'Bank of India Apps' with 'Bank of India App'
Replaced 'Bank of Maharashtra Apps' with 'Bank of Maharashtra App'
Replaced 'Bharat Pe #' with 'Bharat Pe'
Replaced 'CBDC Apps' with 'CBDC'
Replaced 'Canara Bank Apps' with 'Canara Bank App'
Replaced 'Central Bank of India Apps' with 'Central Bank of India App'
Replaced 'Citi Bank Apps' with 'Citi Bank App'
Replaced '

In [25]:
# Get all unique names in the 'Application Name' column
unique_applications = combined_df['Application Name'].unique()
unique_applications.sort()

for i in unique_applications:
    print(i)

AU Small Finance Bank App
Aditya Birla Capital Digital
Airtel Payments Bank Apps
Akudo & Muvin (Livquik)
Allahabad Bank App
Amazon Pay
Andhra Bank Apps
Axis Bank Apps
BHIM
BPAYWALLET
Bajaj Finserv
Bandhan Bank Apps
Bank of America Apps
Bank of Baroda Apps
Bank of India App
Bank of Maharashtra App
Bharat Pe
CBDC
Canara Bank App
Central Bank of India App
Cheq
Citi Bank App
City Union Bank App
Cointab
Cred
DBS Bank Apps
Dena Bank App
Deutsche Bank App
Dhani
Dhanlaxmi Bank App
Digikhata
EROUTE
ESAF Small Finance Bank Apps
Equitas Small Apps
FAM
FIFEDERAL
FamApp by Trio
Fave
Federal Bank App
Fi Money #
Fino Payments Bank App
Finserv Markets
Finshell Pay
Flipkart UPI
Freecharge #
Freo #
GOAXB
Genwise #
GoIbibo
GoKiwi
GoNiyo
Google Pay
Groww
HDFC Bank Apps
HSBC Bank App
ICICI Bank Apps
IDBI Bank App
IDFC First Bank Apps
INDIE
India Post Payments Bank App
Indian Bank App
Indian Overseas Bank Apps
Jammu and Kashmir Bank App
Janta Sahakari Bank App
Jio Payments Bank App
Jupiter Money
Karnataka B

In [26]:
combined_df.head()

Unnamed: 0,Sr.No.,Application Name,Year,Month,Volume Customer(Mn),Value Customer (Cr),Volume B2C (Mn),Value B2C (Cr),Volume B2B(Mn),Value B2B(Cr),Volume (Mn),Value (Cr)
0,1,Airtel Payments Bank Apps,2022,4,5.29,454.64,,,,,5.3,460.9
1,2,Amazon Pay,2022,4,73.21,6699.57,,,,,73.21,6699.57
2,3,AU Small Finance Bank App,2022,4,0.32,109.89,,,,,0.32,109.89
3,4,Axis Bank Apps,2022,4,7.19,1213.44,56.84,43.71,0.0,0.0,64.03,1257.15
4,5,Bajaj Finserv,2022,4,0.52,38.61,,,,,0.52,38.61


In [35]:
# Save the combined DataFrame to a new CSV file
output_file = "D:/dataset/UPI/applications/combined_upi_data.csv"
combined_df.to_csv(output_file, index=False)
# Drop the first column of the DataFrame
combined_df = combined_df.iloc[:, 1:]

print(f"Combined CSV file saved as: {output_file}")

Combined CSV file saved as: D:/dataset/UPI/applications/combined_upi_data.csv
