Roadmap that I'm following: https://medium.com/@ayush-thakur02/a-mini-roadmap-to-learn-pandas-library-in-a-week-417741ef3564

# Day 1: Introduction to Pandas

In [20]:
# pandas - open-source data analysis and manipulation library
 # name “Pandas” is derived from "Panel Data"

# two primary data structures:
# Series: A one-dimensional labeled array.
# DataFrame: A two-dimensional labeled table (like an Excel spreadsheet or SQL table).



In [1]:
import pandas as pd

# load csv file
df = pd.read_csv("continent.csv")

# view first few rows 
print(df.head())

# summary statistics
print(df.describe())



    Continent
0      Africa
1  Antarctica
2        Asia
3   Australia
4      Europe
       Continent
count          7
unique         7
top       Africa
freq           1


In [48]:
# Creating a Series
# From a list:
data = [10, 20, 30, 40]
series = pd.Series(data)
print(series)


# From a dictionary:
data = {'a': 100, 'b': 200, 'c': 300}
series = pd.Series(data)
print(series)


0    10
1    20
2    30
3    40
dtype: int64
a    100
b    200
c    300
dtype: int64


In [77]:
# Creating a DataFrame
# From a dictionary of lists:
data = {
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Age': [25, 30, 35]
}
df = pd.DataFrame(data)
print(df)

 # From a list of dictionaries:
data = [
    {'Name': 'Alice', 'Age': 25},
    {'Name': 'Bob', 'Age': 30}
]
df = pd.DataFrame(data)
print(df)

# From a CSV file:
df = pd.read_csv('continent.csv')


      Name  Age
0    Alice   25
1      Bob   30
2  Charlie   35
    Name  Age
0  Alice   25
1    Bob   30


In [64]:
# Accessing in Series
print(series['a'])
print(df.loc[0])           # By index label
print(df.iloc[1])          # By index position


100
Continent    Africa
Name: 0, dtype: object
Continent    Antarctica
Name: 1, dtype: object


In [89]:
# Modifying DataFrame
# Change value in specific cell
df.loc['Africa'] = 'Bangladesh'

print(df)


            Continent      Africa
0              Africa       india
1          Antarctica       india
2                Asia       india
3           Australia       india
4              Europe       india
5       North America       india
6       South America       india
Africa     Bangladesh  Bangladesh


# Day 2: Reading and Writing Data

In [55]:
# read and write CSV files using Pandas
df = pd.read_csv("WorldPopulation2023.csv")
pd.set_option('display.max_columns',5)
print(df.head())

print(df.describe())

   Rank         Country  ...  UrbanPop% WorldShare
0    36     Afghanistan  ...       26 %     0.53 %
1   138         Albania  ...       67 %     0.04 %
2    34         Algeria  ...       75 %     0.57 %
3   212  American Samoa  ...       N.A.     0.00 %
4   202         Andorra  ...       85 %     0.00 %

[5 rows x 12 columns]
             Rank  Population2023  ...   Fert.Rate   MedianAge
count  234.000000    2.340000e+02  ...  233.000000  233.000000
mean   117.500000    3.437565e+07  ...    2.414163   31.309013
std     67.694165    1.373861e+08  ...    1.155913    9.628386
min      1.000000    5.180000e+02  ...    0.800000   15.000000
25%     59.250000    4.696482e+05  ...    1.600000   22.000000
50%    117.500000    5.643895e+06  ...    2.000000   32.000000
75%    175.750000    2.324537e+07  ...    3.000000   40.000000
max    234.000000    1.428628e+09  ...    6.700000   54.000000

[8 rows x 8 columns]


In [101]:
filt = (df['Country'].str.startswith('B'))
# print(filt)
filtered_df = df.loc[filt]
print(filtered_df)

    Rank                 Country  ...  UrbanPop% WorldShare
14   177                 Bahamas  ...       85 %     0.01 %
15   154                 Bahrain  ...       N.A.     0.02 %
16     8              Bangladesh  ...       41 %     2.15 %
17   187                Barbados  ...       32 %     0.00 %
18    97                 Belarus  ...       80 %     0.12 %
19    82                 Belgium  ...       99 %     0.15 %
20   178                  Belize  ...       48 %     0.01 %
21    77                   Benin  ...       48 %     0.17 %
22   205                 Bermuda  ...       94 %     0.00 %
23   165                  Bhutan  ...       49 %     0.01 %
24    80                 Bolivia  ...       69 %     0.15 %
25   137  Bosnia and Herzegovina  ...       54 %     0.04 %
26   144                Botswana  ...       69 %     0.03 %
27     7                  Brazil  ...       88 %     2.69 %
28   219  British Virgin Islands  ...       53 %     0.00 %
29   176                 Brunei   ...   

In [113]:
# export this dataframe into new csv file
filtered_df.to_csv('data/filtered_world_population_2023.csv')
filtered_df.to_csv('data/filtered_world_population_2023.tsv', sep='\t')


In [None]:
# read and write JSON files using Pandas


In [125]:
# read and write Excel files using Pandas
df = pd.read_excel('data/file_example_XLSX_1000.xlsx')
print(df.head())

filtered_df.to_excel('data/mod.xlsx')

   Unnamed: 0 First Name  ...        Date    Id
0           1      Dulce  ...  15/10/2017  1562
1           2       Mara  ...  16/08/2016  1582
2           3     Philip  ...  21/05/2015  2587
3           4   Kathleen  ...  15/10/2017  3549
4           5    Nereida  ...  16/08/2016  2468

[5 rows x 8 columns]


In [None]:
# read and write SQL databases using Pandas

In [None]:
 # handle missing values, duplicates, and data types using Pandas


# Day 3: Analysing Data


# Day 4: Manipulating Data


# Day 5: Working with Time Series

# Day 6: Working with Text Data


# Day 7: Working with Categorical Data
