## WORKING WITH PDF FILES IN PYTHON USING PDF PLUMBER MODULE IN PYTHON 

In [1]:
import pandas as pd
import numpy as np
import pdfplumber
import logging

## Configuring logging module

In [2]:
logging.basicConfig(
    filename='Working_with_pdfs_logs.log',
    level=logging.DEBUG,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S'    
)

## Defining the Logger

In [3]:
logger = logging.getLogger('working_with_pdf_logs')

## Testing Logging Module

In [5]:

try:
    if 4 > 3:
        print('2 is greater than 3')
        logger.info('code inside the try block Executed without any Errors......')
except Exception as e:
    logger.error(f"Error while Executing the code {e}")
finally:
    logger.info("code Execution is completed..")

2 is greater than 3


## Working with pdf files

In [10]:
# Open the PDF file
try:
    with pdfplumber.open('Practice_Tables_with_pdf.pdf') as pdf:
        logger.info('succesfully Opened Succesfully.........')
        # Getting Total Number of pdf pages 
        Total_number_of_pages = len(pdf.pages)
        print(f"there are Total {Total_number_of_pages} pages in Opened Pdf file.")

        # Navigating through pages 
        pdf_pages = pdf.pages

        page_1 = pdf_pages[0]  # Access the First page (0-indexed)

        page_1_content = page_1.extract_text()  # Extract text from the page
        print('\n',page_1_content)

        #Extracting Tables in pdf pages
        page_2 = pdf_pages[1] # Access the Secong page (0-indexed)

        #Extracting Tables from the pdf page
        Tables_in_pdf_page_2 = page_2.extract_table()
        
        # Convert table to DataFrame

        # Creating Table header 
        header = Tables_in_pdf_page_2[0]
        data = Tables_in_pdf_page_2[1:]

        # creating dataframe 
        df = pd.DataFrame(data=data, columns=header)  # Skip the header row
        print(df.head())

except Exception as e:
    logger.error(f"Error while Execution of the code present in the Try Block")
    print(e)
finally:
    logger.info('Code Execution in completed Thank you')

there are Total 2 pages in Opened Pdf file.

 Details of Tables
in the PDF File This PDF contains 2 pages:
1. Page 1 (this page) contains details about the tables present in the PDF.
2. Page 2 contains a table of Employee Details.
Both tables are designed for practice with data frames, including operations like grouping,
filtering, and aggregations
Confidential
  EMP_ID       EMP_NAME     DEPARTMENT SALARY JOINING_DATE
0   E101   MADHU SUDHAN  DATA ENGINEER  50000   2022-01-15
1   E102    AJAY PRASAD                 62000   2020-01-15
2   E103     OM PRAKASH      SIBEL CRM  79000   2020-01-15
3   E104   MAHESH REGAR            OBI  45000   2020-01-16
4   E105  GOPI KRISHNAN            DBA  39700   2020-01-17


In [11]:
df.head()

Unnamed: 0,EMP_ID,EMP_NAME,DEPARTMENT,SALARY,JOINING_DATE
0,E101,MADHU SUDHAN,DATA ENGINEER,50000,2022-01-15
1,E102,AJAY PRASAD,,62000,2020-01-15
2,E103,OM PRAKASH,SIBEL CRM,79000,2020-01-15
3,E104,MAHESH REGAR,OBI,45000,2020-01-16
4,E105,GOPI KRISHNAN,DBA,39700,2020-01-17


In [20]:
# df[df['DEPARTMENT'] == '']
df.loc[df['DEPARTMENT'] == '', 'DEPARTMENT'] = 'MISSING'

In [21]:
df

Unnamed: 0,EMP_ID,EMP_NAME,DEPARTMENT,SALARY,JOINING_DATE
0,E101,MADHU SUDHAN,DATA ENGINEER,50000,2022-01-15
1,E102,AJAY PRASAD,MISSING,62000,2020-01-15
2,E103,OM PRAKASH,SIBEL CRM,79000,2020-01-15
3,E104,MAHESH REGAR,OBI,45000,2020-01-16
4,E105,GOPI KRISHNAN,DBA,39700,2020-01-17
5,E106,BIPIN KADUKURI,DBA,53003,2020-01-18
6,E107,ASHISH SHARMA,SIBEL CRM,49087,2020-01-19


## Working with Complex pdf files

In [23]:
try:
    pdf_file_name = "Daily_Sales_Report_02_10_2024.pdf"
    with pdfplumber.open(pdf_file_name) as pdf:
        Total_number_of_pages = len(pdf.pages)
        print(f'\nTotal number of pages are {Total_number_of_pages}')

except Exception as e:
    print(e)

finally:
    print(f"Code Execution is Completed Please verify the result if is there any errors.......")


Total number of pages are 3
Code Execution is Completed Please verify the result if is there any errors.......


## Finding pages that contains tables 

In [25]:
pages_with_tables = []
with pdfplumber.open(pdf_file_name) as pdf:
    for i,page in enumerate(pdf.pages):
        table = page.extract_table()
        if table:
            pages_with_tables.append(i + 1)

    
if pages_with_tables:
    print(f"Pages that contain Tables are {pages_with_tables}")

Pages that contain Tables are [2, 3]


## Finding Number of Tables

In [28]:
with pdfplumber.open(pdf_file_name) as pdf:
    find_page_2_tables = pdf.pages[1].find_tables()
    # page_2_tables = pdf.pages[1].extract_tables()
    # page_3_tables = pdf.pages[2].extract_tables()

## Finding number of tables 
if find_page_2_tables:
    for table in find_page_2_tables:
        print(table)


print(f"number of tables are {len(find_page_2_tables)}")

<pdfplumber.table.Table object at 0x0000021BF56BDAD0>
<pdfplumber.table.Table object at 0x0000021BF55863D0>
<pdfplumber.table.Table object at 0x0000021BF5586790>
<pdfplumber.table.Table object at 0x0000021BF5587F50>
<pdfplumber.table.Table object at 0x0000021BF5584350>
number of tables are 5


## Creating Raw Dataframes from pdf page table to analyse and clean the data

In [29]:
dataframes = []
for table in find_page_2_tables:
    table_data = table.extract()
    df = pd.DataFrame(table_data[1:],columns=table_data[0])
    dataframes.append(df)

## Finding Number of dataframes created

In [30]:
print(len(dataframes))

5


In [31]:
for df in dataframes:
    print(display(df.head(5)))

Unnamed: 0,Unnamed: 1,Unnamed: 2,Current Month,None,None.1,None.2,None.3,None.4,Last Month,None.5,None.6,Last Year Same Month,None.7,None.8
0,,Model,Business Plan,Target,DAY,MTD,Achv%,Contr%,MTD,Growth%,Month Closing,MTD,Growth%,Month Closing
1,Enquiry,Grand i10 NIOS,,0,1275,1275,0%,9%,948,34.0%,50093,742,72.0%,60172
2,,All New i20,,0,899,899,0%,6%,780,15.0%,35826,500,80.0%,37262
3,,i20 N Line,,0,114,114,0%,1%,185,-38.0%,5414,79,44.0%,4913
4,,Aura,,0,1093,1093,0%,8%,989,11.0%,40186,493,122.0%,33081


None


Unnamed: 0,Booking,Grand i10 NIOS,Unnamed: 3,0,77,77.1,0%,10%,41,87.8%,6832,42,83.3%,9748
0,,All New i20,,0,60,60,0%,8%,42,42.9%,5488,45,33.3%,7837
1,,i20 N Line,,0,5,5,0%,1%,4,25.0%,472,6,-16.7%,757
2,,Aura,,0,80,80,0%,10%,47,70.2%,5983,53,50.9%,7211
3,,New Verna,,0,21,21,0%,3%,16,31.3%,1910,18,16.7%,3675
4,,Exter,,0,115,115,0%,15%,57,101.8%,9509,84,36.9%,10887


None


Unnamed: 0,Retail,Grand i10 NIOS,7085,0,7,7.1,0%,26%,0.1,0.0%,4713,4,75.0%,6444
0,,All New i20,6450,0,3,3,0%,11%,0,0.0%,3872,1,200.0%,5432
1,,i20 N Line,400,0,0,0,0%,0%,0,0.0%,320,0,0.0%,552
2,,Aura,4200,0,2,2,0%,7%,0,0.0%,3971,3,-33.3%,4297
3,,New Verna,2500,0,0,0,0%,0%,0,0.0%,1233,3,-100.0%,2415
4,,Exter,8600,0,0,0,0%,0%,0,0.0%,6376,1,-100.0%,7872


None


Unnamed: 0,Order,Grand i10 NIOS,7035,0,32,32.1,0%,6%,3,966.7%,5009,0.1,0.0%,6871
0,,All New i20,6300,0,17,17,0%,3%,2,750.0%,4022,0,0.0%,6613
1,,i20 N Line,300,0,1,1,0%,0%,0,0.0%,323,0,0.0%,344
2,,Aura,4500,0,21,21,0%,4%,1,2000.0%,4369,0,0.0%,4308
3,,New Verna,2350,0,7,7,0%,1%,1,600.0%,1194,0,0.0%,2343
4,,Exter,7300,0,23,23,0%,4%,8,187.5%,6776,0,0.0%,8301


None


Unnamed: 0,Wholesale,Grand i10 NIOS,7035,0,23,23.1,0%,5%,82,-72.0%,5103,79,-70.9%,6552
0,,All New i20,6300,0,7,7,0%,2%,64,-89.1%,4106,302,-97.7%,6772
1,,i20 N Line,300,0,2,2,0%,0%,0,0.0%,322,4,-50.0%,440
2,,Aura,4500,0,19,19,0%,5%,94,-79.8%,4462,19,0.0%,4096
3,,New Verna,2350,0,7,7,0%,2%,3,133.3%,1198,97,-92.8%,2313
4,,Exter,7300,0,21,21,0%,5%,111,-81.1%,6908,219,-90.4%,8097


None


## First Dataframe

In [32]:
df_1 = dataframes[0]
df_1

Unnamed: 0,Unnamed: 1,Unnamed: 2,Current Month,None,None.1,None.2,None.3,None.4,Last Month,None.5,None.6,Last Year Same Month,None.7,None.8
0,,Model,Business Plan,Target,DAY,MTD,Achv%,Contr%,MTD,Growth%,Month Closing,MTD,Growth%,Month Closing
1,Enquiry,Grand i10 NIOS,,0,1275,1275,0%,9%,948,34.0%,50093,742,72.0%,60172
2,,All New i20,,0,899,899,0%,6%,780,15.0%,35826,500,80.0%,37262
3,,i20 N Line,,0,114,114,0%,1%,185,-38.0%,5414,79,44.0%,4913
4,,Aura,,0,1093,1093,0%,8%,989,11.0%,40186,493,122.0%,33081
5,,New Verna,,0,780,780,0%,6%,613,27.0%,24902,321,143.0%,23157
6,,Exter,,0,3009,3009,0%,22%,1413,113.0%,70968,1520,98.0%,72174
7,,Venue,,0,1979,1979,0%,14%,1412,40.0%,80440,1006,97.0%,66759
8,,Venue N Line,,0,81,81,0%,1%,186,-56.0%,4885,45,80.0%,3174
9,,Creta,,0,2404,2404,0%,17%,1833,31.0%,98275,1200,100.0%,80241


## second dataframe

In [34]:
df_2 = dataframes[1]
df_2

Unnamed: 0,Booking,Grand i10 NIOS,Unnamed: 3,0,77,77.1,0%,10%,41,87.8%,6832,42,83.3%,9748
0,,All New i20,,0,60,60,0%,8%,42,42.9%,5488,45,33.3%,7837
1,,i20 N Line,,0,5,5,0%,1%,4,25.0%,472,6,-16.7%,757
2,,Aura,,0,80,80,0%,10%,47,70.2%,5983,53,50.9%,7211
3,,New Verna,,0,21,21,0%,3%,16,31.3%,1910,18,16.7%,3675
4,,Exter,,0,115,115,0%,15%,57,101.8%,9509,84,36.9%,10887
5,,Venue,,0,160,160,0%,20%,71,125.4%,13972,65,146.2%,15101
6,,Venue N Line,,0,0,0,0%,0%,5,-100.0%,353,4,-100.0%,574
7,,Creta,,0,224,224,0%,28%,119,88.2%,19873,100,124.0%,19836
8,,Creta N Line,,0,4,4,0%,1%,6,-33.3%,549,0,0.0%,0
9,,Alcazar,,0,42,42,0%,5%,31,35.5%,4406,19,121.1%,2482


## Third dataframe

In [35]:
df_3 = dataframes[2]
df_3

Unnamed: 0,Retail,Grand i10 NIOS,7085,0,7,7.1,0%,26%,0.1,0.0%,4713,4,75.0%,6444
0,,All New i20,6450,0,3,3,0%,11%,0,0.0%,3872,1,200.0%,5432
1,,i20 N Line,400,0,0,0,0%,0%,0,0.0%,320,0,0.0%,552
2,,Aura,4200,0,2,2,0%,7%,0,0.0%,3971,3,-33.3%,4297
3,,New Verna,2500,0,0,0,0%,0%,0,0.0%,1233,3,-100.0%,2415
4,,Exter,8600,0,0,0,0%,0%,0,0.0%,6376,1,-100.0%,7872
5,,Venue,9650,0,5,5,0%,19%,0,0.0%,9406,10,-50.0%,10715
6,,Venue N Line,450,0,0,0,0%,0%,0,0.0%,259,0,0.0%,403
7,,Creta,14600,0,7,7,0%,26%,0,0.0%,13904,17,-58.8%,13437
8,,Creta N Line,800,0,0,0,0%,0%,0,0.0%,431,0,0.0%,0
9,,Alcazar,2400,0,3,3,0%,11%,0,0.0%,2375,2,50.0%,1674


## Fourth Dataframe

In [36]:
df_4 = dataframes[3]
df_4

Unnamed: 0,Order,Grand i10 NIOS,7035,0,32,32.1,0%,6%,3,966.7%,5009,0.1,0.0%,6871
0,,All New i20,6300,0,17,17,0%,3%,2,750.0%,4022,0,0.0%,6613
1,,i20 N Line,300,0,1,1,0%,0%,0,0.0%,323,0,0.0%,344
2,,Aura,4500,0,21,21,0%,4%,1,2000.0%,4369,0,0.0%,4308
3,,New Verna,2350,0,7,7,0%,1%,1,600.0%,1194,0,0.0%,2343
4,,Exter,7300,0,23,23,0%,4%,8,187.5%,6776,0,0.0%,8301
5,,Venue,9900,0,73,73,0%,13%,3,2333.3%,9914,0,0.0%,11383
6,,Venue N Line,350,0,0,0,0%,0%,1,-100.0%,209,0,0.0%,388
7,,Creta,13700,0,343,343,0%,63%,20,1615.0%,15470,0,0.0%,13282
8,,Creta N Line,700,0,1,1,0%,0%,2,-50.0%,445,0,0.0%,0
9,,Alcazar,2600,0,28,28,0%,5%,0,0.0%,2483,0,0.0%,2051


## Fifth Dataframe

In [37]:
df_5 = dataframes[4]
df_5

Unnamed: 0,Wholesale,Grand i10 NIOS,7035,0,23,23.1,0%,5%,82,-72.0%,5103,79,-70.9%,6552
0,,All New i20,6300,0,7,7,0%,2%,64,-89.1%,4106,302,-97.7%,6772
1,,i20 N Line,300,0,2,2,0%,0%,0,0.0%,322,4,-50.0%,440
2,,Aura,4500,0,19,19,0%,5%,94,-79.8%,4462,19,0.0%,4096
3,,New Verna,2350,0,7,7,0%,2%,3,133.3%,1198,97,-92.8%,2313
4,,Exter,7300,0,21,21,0%,5%,111,-81.1%,6908,219,-90.4%,8097
5,,Venue,9900,0,34,34,0%,8%,115,-70.4%,10049,191,-82.2%,11195
6,,Venue N Line,350,0,0,0,0%,0%,0,0.0%,210,0,0.0%,386
7,,Creta,13700,0,283,283,0%,67%,22,1186.4%,15461,248,14.1%,13077
8,,Creta N Line,700,0,1,1,0%,0%,0,0.0%,441,0,0.0%,0
9,,Alcazar,2600,0,23,23,0%,5%,188,-87.8%,2712,73,-68.5%,1837


## Analysing The dataframes and taking Actions according to requirements 

- in the First Dataframe we can see the header is not real header so we need to change that part 
- in remaining all the dataframes data is directly starting without any header so we need to use first dataframe header remaining dataframes

## Now we will Achieve Required Table step by step 

## Let's deep dive into First Dataframe
- Let's Try to Clean it and see how it looks then we can make other dataframes clean

In [40]:
df_1

Unnamed: 0,Unnamed: 1,Unnamed: 2,Current Month,None,None.1,None.2,None.3,None.4,Last Month,None.5,None.6,Last Year Same Month,None.7,None.8
0,,Model,Business Plan,Target,DAY,MTD,Achv%,Contr%,MTD,Growth%,Month Closing,MTD,Growth%,Month Closing
1,Enquiry,Grand i10 NIOS,,0,1275,1275,0%,9%,948,34.0%,50093,742,72.0%,60172
2,,All New i20,,0,899,899,0%,6%,780,15.0%,35826,500,80.0%,37262
3,,i20 N Line,,0,114,114,0%,1%,185,-38.0%,5414,79,44.0%,4913
4,,Aura,,0,1093,1093,0%,8%,989,11.0%,40186,493,122.0%,33081
5,,New Verna,,0,780,780,0%,6%,613,27.0%,24902,321,143.0%,23157
6,,Exter,,0,3009,3009,0%,22%,1413,113.0%,70968,1520,98.0%,72174
7,,Venue,,0,1979,1979,0%,14%,1412,40.0%,80440,1006,97.0%,66759
8,,Venue N Line,,0,81,81,0%,1%,186,-56.0%,4885,45,80.0%,3174
9,,Creta,,0,2404,2404,0%,17%,1833,31.0%,98275,1200,100.0%,80241


1. Firstly we need to create a header so that it can be used for all other dataframes 
2. first row is not real columns so need to remove first row 
3. Grand Total is not needed so need need to remove entire row in all dataframes as we can see above datafames all dataframes consists of Grand Total 
4. Business plan column and Target are of no use so need to that columns 

In [48]:
with pdfplumber.open(pdf_file_name) as pdf:
    # Extract tables from page 2
    page_2 = pdf.pages[1]  # Index starts at 0, so page 2 is index 1
    tables = page_2.extract_tables() # Extracting all Tables from page no 2

In [49]:
# Ensure there are tables extracted
if tables:
    HEADERS = tables[0][1] ## As there are five tables we are creating header from 1 st table second row 
    print(HEADERS)
else:
    print("No tables found on page 2.")

['', 'Model', 'Business Plan', 'Target', 'DAY', 'MTD', 'Achv%', 'Contr%', 'MTD', 'Growth%', 'Month Closing', 'MTD', 'Growth%', 'Month Closing']


## According to Above result we can use above  but there is '' value which can be converted as 'CATEGORY_TYPE'

In [51]:
HEADERS = ['CATEGORY_TYPE' if item == ''else item.replace(' ','_').upper().replace('%','') for item in HEADERS]
HEADERS

['CATEGORY_TYPE',
 'MODEL',
 'BUSINESS_PLAN',
 'TARGET',
 'DAY',
 'MTD',
 'ACHV',
 'CONTR',
 'MTD',
 'GROWTH',
 'MONTH_CLOSING',
 'MTD',
 'GROWTH',
 'MONTH_CLOSING']

## Above results seems to be perfect now we are good to go 

In [53]:
df_first_table = pd.DataFrame(tables[0][2:], columns=HEADERS)

In [54]:
df_first_table

Unnamed: 0,CATEGORY_TYPE,MODEL,BUSINESS_PLAN,TARGET,DAY,MTD,ACHV,CONTR,MTD.1,GROWTH,MONTH_CLOSING,MTD.2,GROWTH.1,MONTH_CLOSING.1
0,Enquiry,Grand i10 NIOS,,0,1275,1275,0%,9%,948,34.0%,50093,742,72.0%,60172
1,,All New i20,,0,899,899,0%,6%,780,15.0%,35826,500,80.0%,37262
2,,i20 N Line,,0,114,114,0%,1%,185,-38.0%,5414,79,44.0%,4913
3,,Aura,,0,1093,1093,0%,8%,989,11.0%,40186,493,122.0%,33081
4,,New Verna,,0,780,780,0%,6%,613,27.0%,24902,321,143.0%,23157
5,,Exter,,0,3009,3009,0%,22%,1413,113.0%,70968,1520,98.0%,72174
6,,Venue,,0,1979,1979,0%,14%,1412,40.0%,80440,1006,97.0%,66759
7,,Venue N Line,,0,81,81,0%,1%,186,-56.0%,4885,45,80.0%,3174
8,,Creta,,0,2404,2404,0%,17%,1833,31.0%,98275,1200,100.0%,80241
9,,Creta N Line,,0,195,195,0%,1%,192,2.0%,5754,0,0.0%,5


In [57]:
# List to store the DataFrames
dfs = []
# Function to process each DataFrame

def process_dataframe(df):
    # Step 1: Remove rows with 'Grand Total' in the 'Model' column
    df = df.loc[df['MODEL'] != 'Grand Total']
    
    # Step 2: Update None values in 'CATEGORY_TYPE'        
    # first_value = df['CATEGORY_TYPE'].iloc[0] if df['CATEGORY_TYPE'].notna().any() else None
    first_value = df['CATEGORY_TYPE'].iloc[0] if df['CATEGORY_TYPE'].notna().any() else None

    # print(first_value)
    # df['CATEGORY_TYPE'] = df['CATEGORY_TYPE'].astype(str)
    # df['CATEGORY_TYPE'].fillna(first_value, inplace=True)
    df.loc[:, 'CATEGORY_TYPE'] = df['CATEGORY_TYPE'].fillna(first_value)
    # df['CATEGORY_TYPE'].fillna(first_value, inplace=True)

    # Step 3: Remove the 'Business Plan' column if it exists
    # columns_to_be_removed = ['BUSINESS_PLAN','TARGET']
    if 'BUSINESS_PLAN' in df.columns:
        df.drop(columns=['BUSINESS_PLAN'], inplace=True)
    # print(df)
    return df

df_first_table = process_dataframe(df_first_table)
dfs.append(df_first_table)

for i in range(1, len(tables)):
    # Convert the table to a DataFrame and apply headers from the first table
    df = pd.DataFrame(tables[i], columns=HEADERS)    
    # Process the DataFrame
    df = process_dataframe(df)
    
    # Append to DataFrame list
    dfs.append(df)
    
    # Save each processed table to CSV
    # csv_path = f"page_2_table_{i+1}.csv"
    # df.to_csv(csv_path, index=False)
    # print(f"Table {i+1} saved to: {csv_path}")



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop(columns=['BUSINESS_PLAN'], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop(columns=['BUSINESS_PLAN'], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop(columns=['BUSINESS_PLAN'], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop(columns=['BUS

In [61]:
for i in dfs:
    print(display(i.head()))

Unnamed: 0,CATEGORY_TYPE,MODEL,TARGET,DAY,MTD,ACHV,CONTR,MTD.1,GROWTH,MONTH_CLOSING,MTD.2,GROWTH.1,MONTH_CLOSING.1
0,Enquiry,Grand i10 NIOS,0,1275,1275,0%,9%,948,34.0%,50093,742,72.0%,60172
1,Enquiry,All New i20,0,899,899,0%,6%,780,15.0%,35826,500,80.0%,37262
2,Enquiry,i20 N Line,0,114,114,0%,1%,185,-38.0%,5414,79,44.0%,4913
3,Enquiry,Aura,0,1093,1093,0%,8%,989,11.0%,40186,493,122.0%,33081
4,Enquiry,New Verna,0,780,780,0%,6%,613,27.0%,24902,321,143.0%,23157


None


Unnamed: 0,CATEGORY_TYPE,MODEL,TARGET,DAY,MTD,ACHV,CONTR,MTD.1,GROWTH,MONTH_CLOSING,MTD.2,GROWTH.1,MONTH_CLOSING.1
0,Booking,Grand i10 NIOS,0,77,77,0%,10%,41,87.8%,6832,42,83.3%,9748
1,Booking,All New i20,0,60,60,0%,8%,42,42.9%,5488,45,33.3%,7837
2,Booking,i20 N Line,0,5,5,0%,1%,4,25.0%,472,6,-16.7%,757
3,Booking,Aura,0,80,80,0%,10%,47,70.2%,5983,53,50.9%,7211
4,Booking,New Verna,0,21,21,0%,3%,16,31.3%,1910,18,16.7%,3675


None


Unnamed: 0,CATEGORY_TYPE,MODEL,TARGET,DAY,MTD,ACHV,CONTR,MTD.1,GROWTH,MONTH_CLOSING,MTD.2,GROWTH.1,MONTH_CLOSING.1
0,Retail,Grand i10 NIOS,0,7,7,0%,26%,0,0.0%,4713,4,75.0%,6444
1,Retail,All New i20,0,3,3,0%,11%,0,0.0%,3872,1,200.0%,5432
2,Retail,i20 N Line,0,0,0,0%,0%,0,0.0%,320,0,0.0%,552
3,Retail,Aura,0,2,2,0%,7%,0,0.0%,3971,3,-33.3%,4297
4,Retail,New Verna,0,0,0,0%,0%,0,0.0%,1233,3,-100.0%,2415


None


Unnamed: 0,CATEGORY_TYPE,MODEL,TARGET,DAY,MTD,ACHV,CONTR,MTD.1,GROWTH,MONTH_CLOSING,MTD.2,GROWTH.1,MONTH_CLOSING.1
0,Order,Grand i10 NIOS,0,32,32,0%,6%,3,966.7%,5009,0,0.0%,6871
1,Order,All New i20,0,17,17,0%,3%,2,750.0%,4022,0,0.0%,6613
2,Order,i20 N Line,0,1,1,0%,0%,0,0.0%,323,0,0.0%,344
3,Order,Aura,0,21,21,0%,4%,1,2000.0%,4369,0,0.0%,4308
4,Order,New Verna,0,7,7,0%,1%,1,600.0%,1194,0,0.0%,2343


None


Unnamed: 0,CATEGORY_TYPE,MODEL,TARGET,DAY,MTD,ACHV,CONTR,MTD.1,GROWTH,MONTH_CLOSING,MTD.2,GROWTH.1,MONTH_CLOSING.1
0,Wholesale,Grand i10 NIOS,0,23,23,0%,5%,82,-72.0%,5103,79,-70.9%,6552
1,Wholesale,All New i20,0,7,7,0%,2%,64,-89.1%,4106,302,-97.7%,6772
2,Wholesale,i20 N Line,0,2,2,0%,0%,0,0.0%,322,4,-50.0%,440
3,Wholesale,Aura,0,19,19,0%,5%,94,-79.8%,4462,19,0.0%,4096
4,Wholesale,New Verna,0,7,7,0%,2%,3,133.3%,1198,97,-92.8%,2313


None


In [64]:
FINAL_DF = pd.concat(dfs,ignore_index=True)

In [66]:
FINAL_DF

Unnamed: 0,CATEGORY_TYPE,MODEL,TARGET,DAY,MTD,ACHV,CONTR,MTD.1,GROWTH,MONTH_CLOSING,MTD.2,GROWTH.1,MONTH_CLOSING.1
0,Enquiry,Grand i10 NIOS,0,1275,1275,0%,9%,948,34.0%,50093,742,72.0%,60172
1,Enquiry,All New i20,0,899,899,0%,6%,780,15.0%,35826,500,80.0%,37262
2,Enquiry,i20 N Line,0,114,114,0%,1%,185,-38.0%,5414,79,44.0%,4913
3,Enquiry,Aura,0,1093,1093,0%,8%,989,11.0%,40186,493,122.0%,33081
4,Enquiry,New Verna,0,780,780,0%,6%,613,27.0%,24902,321,143.0%,23157
...,...,...,...,...,...,...,...,...,...,...,...,...,...
67,Wholesale,Alcazar,0,23,23,0%,5%,188,-87.8%,2712,73,-68.5%,1837
68,Wholesale,New Tucson,0,2,2,0%,0%,0,0.0%,98,0,0.0%,202
69,Wholesale,Kona EV,0,0,0,0%,0%,0,0.0%,0,0,0.0%,44
70,Wholesale,Ioniq 5,0,0,0,0%,0%,0,0.0%,31,0,0.0%,117
