### Import Pandas and Numpy Libraries

In [3]:
# Import pandas and numpy libraries

import pandas as pd
import numpy as np

### Create a DataFrame from the stored Dictionary 

In [7]:
cars_per_cap = [809, 731, 588, 18, 200, 70, 45]
country = ['United States', 'Australia', 'Japan', 'India', 'Russia', 'Morocco', 'Egypt']
drives_right = [True, False, False, False, True, True, True]

data = pd.DataFrame({"cars_per_cap" : cars_per_cap, "country": country , "drives_right":drives_right})

In [8]:
data

Unnamed: 0,cars_per_cap,country,drives_right
0,809,United States,True
1,731,Australia,False
2,588,Japan,False
3,18,India,False
4,200,Russia,True
5,70,Morocco,True
6,45,Egypt,True


### Read a CSV file using pandas

In [12]:
cars_df = pd.read_csv("cars.csv") # Read the csv file   

cars_df

Unnamed: 0,USCA,US,United States,809,FALSE
0,ASPAC,AUS,Australia,731.0,True
1,ASPAC,JAP,Japan,588.0,True
2,ASPAC,IN,India,18.0,True
3,ASPAC,RU,Russia,200.0,False
4,LATAM,MOR,Morocco,70.0,False
5,AFR,EG,Egypt,45.0,False
6,EUR,ENG,England,,True


In [15]:
# Remove the header from the csv file by setting the header parameter to None

cars_df = pd.read_csv("cars.csv", header=None) # Read the csv file with header set to None     
cars_df

Unnamed: 0,0,1,2,3,4
0,USCA,US,United States,809.0,False
1,ASPAC,AUS,Australia,731.0,True
2,ASPAC,JAP,Japan,588.0,True
3,ASPAC,IN,India,18.0,True
4,ASPAC,RU,Russia,200.0,False
5,LATAM,MOR,Morocco,70.0,False
6,AFR,EG,Egypt,45.0,False
7,EUR,ENG,England,,True


In [16]:
# assign the column names to the dataframe


cars_df.columns # Display the column names


Index([0, 1, 2, 3, 4], dtype='int64')

In [18]:
cars_df.columns = ['country code', 'region', 'country', 'cars_per_cap', 'drives_right'] # Assign the column names to the dataframe

In [19]:
cars_df.columns

Index(['country code', 'region', 'country', 'cars_per_cap', 'drives_right'], dtype='object')

In [20]:
cars_df

Unnamed: 0,country code,region,country,cars_per_cap,drives_right
0,USCA,US,United States,809.0,False
1,ASPAC,AUS,Australia,731.0,True
2,ASPAC,JAP,Japan,588.0,True
3,ASPAC,IN,India,18.0,True
4,ASPAC,RU,Russia,200.0,False
5,LATAM,MOR,Morocco,70.0,False
6,AFR,EG,Egypt,45.0,False
7,EUR,ENG,England,,True


### Indexing of Columns in python pandas

In [21]:
cars_df.index # Display the index of the dataframe

RangeIndex(start=0, stop=8, step=1)

In [22]:
# Assign the country code as the index of the dataframe

cars_df = pd.read_csv("cars.csv", header=None, index_col=0) # Read the csv file with header set to None and index_col set to 0

In [23]:
cars_df

Unnamed: 0_level_0,1,2,3,4
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
USCA,US,United States,809.0,False
ASPAC,AUS,Australia,731.0,True
ASPAC,JAP,Japan,588.0,True
ASPAC,IN,India,18.0,True
ASPAC,RU,Russia,200.0,False
LATAM,MOR,Morocco,70.0,False
AFR,EG,Egypt,45.0,False
EUR,ENG,England,,True


In [25]:
cars_df.columns = ['region', 'country', 'cars_per_cap', 'drives_right'] # Assign the column names to the dataframe

In [27]:
cars_df.index.name = 'country code' # Assign the index name to the dataframe

In [28]:
cars_df

Unnamed: 0_level_0,region,country,cars_per_cap,drives_right
country code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
USCA,US,United States,809.0,False
ASPAC,AUS,Australia,731.0,True
ASPAC,JAP,Japan,588.0,True
ASPAC,IN,India,18.0,True
ASPAC,RU,Russia,200.0,False
LATAM,MOR,Morocco,70.0,False
AFR,EG,Egypt,45.0,False
EUR,ENG,England,,True


In [31]:
# remove the index name from the dataframe
cars_df.index.name = None # Remove the index name from the dataframe

In [32]:
cars_df

Unnamed: 0,region,country,cars_per_cap,drives_right
USCA,US,United States,809.0,False
ASPAC,AUS,Australia,731.0,True
ASPAC,JAP,Japan,588.0,True
ASPAC,IN,India,18.0,True
ASPAC,RU,Russia,200.0,False
LATAM,MOR,Morocco,70.0,False
AFR,EG,Egypt,45.0,False
EUR,ENG,England,,True


In [33]:
# write the dataframe to a csv file
cars_df.to_csv("cars_new.csv") # Write the dataframe to a csv

In [36]:
new_df = pd.read_csv("cars_new.csv") # Read the csv file

In [38]:
new_df.columns

Index(['Unnamed: 0', 'region', 'country', 'cars_per_cap', 'drives_right'], dtype='object')

In [39]:
new_df.index.name = 'None'

In [41]:
new_df.columns = ['country code', 'region', 'country', 'cars_per_cap', 'drives_right']  

In [42]:
new_df

Unnamed: 0_level_0,country code,region,country,cars_per_cap,drives_right
None,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,USCA,US,United States,809.0,False
1,ASPAC,AUS,Australia,731.0,True
2,ASPAC,JAP,Japan,588.0,True
3,ASPAC,IN,India,18.0,True
4,ASPAC,RU,Russia,200.0,False
5,LATAM,MOR,Morocco,70.0,False
6,AFR,EG,Egypt,45.0,False
7,EUR,ENG,England,,True


In [43]:
new_df.index.delete = 'None'

In [44]:
new_df

Unnamed: 0_level_0,country code,region,country,cars_per_cap,drives_right
None,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,USCA,US,United States,809.0,False
1,ASPAC,AUS,Australia,731.0,True
2,ASPAC,JAP,Japan,588.0,True
3,ASPAC,IN,India,18.0,True
4,ASPAC,RU,Russia,200.0,False
5,LATAM,MOR,Morocco,70.0,False
6,AFR,EG,Egypt,45.0,False
7,EUR,ENG,England,,True


In [45]:
# remove the index None from the dataframe
new_df = pd.read_csv("cars_new.csv", index_col=0) # Read the csv file with index_col set to 0

In [46]:
new_df

Unnamed: 0,region,country,cars_per_cap,drives_right
USCA,US,United States,809.0,False
ASPAC,AUS,Australia,731.0,True
ASPAC,JAP,Japan,588.0,True
ASPAC,IN,India,18.0,True
ASPAC,RU,Russia,200.0,False
LATAM,MOR,Morocco,70.0,False
AFR,EG,Egypt,45.0,False
EUR,ENG,England,,True


In [73]:
df = pd.read_csv("https://media-doselect.s3.amazonaws.com/generic/A08MajL8qN4rq72EpVJbAP1Rw/marks_1.csv") # Read the csv file

In [74]:
df

Unnamed: 0,1|Akshay|Mathematics|50|40|80
0,2|Mahima|English|40|33|83
1,3|Vikas|Mathematics|50|42|84
2,4|Abhinav|English|40|31|78
3,5|Mahima|Science|50|40|80
4,6|Akshay|Science|50|49|98
5,7|Abhinav|Mathematics|50|47|94
6,8|Vikas|Science|50|40|80
7,9|Abhinav|Science|50|47|94
8,10|Vikas|English|40|39|98
9,11|Akshay|English|40|35|88


In [75]:
df.columns

Index(['1|Akshay|Mathematics|50|40|80'], dtype='object')

In [109]:
# make the header as a row in the dataframe
df = pd.read_csv("https://media-doselect.s3.amazonaws.com/generic/A08MajL8qN4rq72EpVJbAP1Rw/marks_1.csv", header=None) # Read the csv file with header set to None

In [110]:
df

Unnamed: 0,0
0,1|Akshay|Mathematics|50|40|80
1,2|Mahima|English|40|33|83
2,3|Vikas|Mathematics|50|42|84
3,4|Abhinav|English|40|31|78
4,5|Mahima|Science|50|40|80
5,6|Akshay|Science|50|49|98
6,7|Abhinav|Mathematics|50|47|94
7,8|Vikas|Science|50|40|80
8,9|Abhinav|Science|50|47|94
9,10|Vikas|English|40|39|98


In [111]:

# Split the single column into multiple columns
df = df[0].str.split('|', expand=True) # Split the single column into multiple columns

# Assign new column names
df.columns = ["S.No", "Name", "Subject", "Maximum Marks", "Marks Obtained", "Percentage"]



In [112]:
df

Unnamed: 0,S.No,Name,Subject,Maximum Marks,Marks Obtained,Percentage
0,1,Akshay,Mathematics,50,40,80
1,2,Mahima,English,40,33,83
2,3,Vikas,Mathematics,50,42,84
3,4,Abhinav,English,40,31,78
4,5,Mahima,Science,50,40,80
5,6,Akshay,Science,50,49,98
6,7,Abhinav,Mathematics,50,47,94
7,8,Vikas,Science,50,40,80
8,9,Abhinav,Science,50,47,94
9,10,Vikas,English,40,39,98


In [115]:
df

Unnamed: 0,S.No,Name,Subject,Maximum Marks,Marks Obtained,Percentage
0,1,Akshay,Mathematics,50,40,80
1,2,Mahima,English,40,33,83
2,3,Vikas,Mathematics,50,42,84
3,4,Abhinav,English,40,31,78
4,5,Mahima,Science,50,40,80
5,6,Akshay,Science,50,49,98
6,7,Abhinav,Mathematics,50,47,94
7,8,Vikas,Science,50,40,80
8,9,Abhinav,Science,50,47,94
9,10,Vikas,English,40,39,98


In [116]:
df.index.name = "S.NO"

In [117]:
df

Unnamed: 0_level_0,S.No,Name,Subject,Maximum Marks,Marks Obtained,Percentage
S.NO,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,1,Akshay,Mathematics,50,40,80
1,2,Mahima,English,40,33,83
2,3,Vikas,Mathematics,50,42,84
3,4,Abhinav,English,40,31,78
4,5,Mahima,Science,50,40,80
5,6,Akshay,Science,50,49,98
6,7,Abhinav,Mathematics,50,47,94
7,8,Vikas,Science,50,40,80
8,9,Abhinav,Science,50,47,94
9,10,Vikas,English,40,39,98


In [120]:
#drop 1st column from the dataframe
df.drop("S.No", axis=1, inplace=True)# Drop the column from the dataframe    

In [121]:
df

Unnamed: 0_level_0,Name,Subject,Maximum Marks,Marks Obtained,Percentage
S.NO,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,Akshay,Mathematics,50,40,80
1,Mahima,English,40,33,83
2,Vikas,Mathematics,50,42,84
3,Abhinav,English,40,31,78
4,Mahima,Science,50,40,80
5,Akshay,Science,50,49,98
6,Abhinav,Mathematics,50,47,94
7,Vikas,Science,50,40,80
8,Abhinav,Science,50,47,94
9,Vikas,English,40,39,98


### Describing the data