# Practice on Pandas Library
------
1.  Pandas is designed for working with tabular or heterogenous data.
2.  Whereas, Numpy is suited for homogeneous numerical array data. 

In [1]:
# Series

import pandas as pd

obj = pd.Series([4,7,-5,3])
obj

0    4
1    7
2   -5
3    3
dtype: int64

In [2]:
obj.values

array([ 4,  7, -5,  3], dtype=int64)

In [3]:
obj.index

RangeIndex(start=0, stop=4, step=1)

In [6]:
# Giving index of your choice

obj2 = pd.Series([4,7,-5,3],index=["d","b","a","c"])
obj2

d    4
b    7
a   -5
c    3
dtype: int64

In [7]:
obj2.index


Index(['d', 'b', 'a', 'c'], dtype='object')

>#### Series in Pandas are Just like Dictionary in Python 

In [8]:
# We can create series from a dictionary

s_data = {"Ohio": 35000, "Texas": 71000, "Oregon": 16000, "Utah": 5000}

obj3 = pd.Series(s_data)
obj3

Ohio      35000
Texas     71000
Oregon    16000
Utah       5000
dtype: int64

#### We can change the index of the obj3 as well

In [9]:
states = ["California", "Ohio", "Oregon", "Texas"]
obj4 = pd.Series(s_data, index=states)
obj4

California        NaN
Ohio          35000.0
Oregon        16000.0
Texas         71000.0
dtype: float64

In [10]:
# Since the data of California is missing that's why it shows NaN (Not a Number) there 
# So to find out the missing data in the data set we have commands like "isnull" or "notnull"

pd.isnull(obj4)

California     True
Ohio          False
Oregon        False
Texas         False
dtype: bool

In [11]:
# We can also write as

obj4.isnull()

California     True
Ohio          False
Oregon        False
Texas         False
dtype: bool

# Data Frames in Pandas
-----------


In [13]:
data = {"states": ["Ohio","Ohio","Ohio","Nevada","Nevada","Nevada"],
        "year": [2000,2001,2002,2001,2002,2003],
        "pop": [1.5,1.7,3.6,2.4,2.9,3.2]}

frame = pd.DataFrame(data)
frame

Unnamed: 0,states,year,pop
0,Ohio,2000,1.5
1,Ohio,2001,1.7
2,Ohio,2002,3.6
3,Nevada,2001,2.4
4,Nevada,2002,2.9
5,Nevada,2003,3.2


In [15]:
frame = pd.DataFrame(data, columns=["year","states", "pop"])
frame

Unnamed: 0,year,states,pop
0,2000,Ohio,1.5
1,2001,Ohio,1.7
2,2002,Ohio,3.6
3,2001,Nevada,2.4
4,2002,Nevada,2.9
5,2003,Nevada,3.2


In [16]:
# A column in a data frame can be retrieve as a Series in pandas

series1 = frame["states"]
series1

0      Ohio
1      Ohio
2      Ohio
3    Nevada
4    Nevada
5    Nevada
Name: states, dtype: object

In [17]:
# Another way of calling a column of data frame
frame.year

0    2000
1    2001
2    2002
3    2001
4    2002
5    2003
Name: year, dtype: int64

In [18]:
# Any row can be retrieved from the data set using frame.loc[] function

frame.loc[3]

year        2001
states    Nevada
pop          2.4
Name: 3, dtype: object

In [19]:
frame2 = pd.DataFrame(data, columns=["year", "states","pop","debt"], index=["one","two","three","four","five","six"])
frame2

Unnamed: 0,year,states,pop,debt
one,2000,Ohio,1.5,
two,2001,Ohio,1.7,
three,2002,Ohio,3.6,
four,2001,Nevada,2.4,
five,2002,Nevada,2.9,
six,2003,Nevada,3.2,


In [20]:
# Columns of the data frame can also be modified, for the above data frame we have NaN in the debt column so we have to put some data into it.

frame2["debt"]=6
frame2

Unnamed: 0,year,states,pop,debt
one,2000,Ohio,1.5,6
two,2001,Ohio,1.7,6
three,2002,Ohio,3.6,6
four,2001,Nevada,2.4,6
five,2002,Nevada,2.9,6
six,2003,Nevada,3.2,6


In [21]:
# Giving 1D array to the debt column

import numpy as np

frame2["debt"]= np.arange(6)
frame2

Unnamed: 0,year,states,pop,debt
one,2000,Ohio,1.5,0
two,2001,Ohio,1.7,1
three,2002,Ohio,3.6,2
four,2001,Nevada,2.4,3
five,2002,Nevada,2.9,4
six,2003,Nevada,3.2,5


In [22]:
# Giving a Series of data into the debt column

val = pd.Series([1.2,1.3,1.4,1.5,1.6,1.7], index=["one","two","three","four","five","six"])
val

one      1.2
two      1.3
three    1.4
four     1.5
five     1.6
six      1.7
dtype: float64

In [23]:
frame2["debt"]= val
frame2

Unnamed: 0,year,states,pop,debt
one,2000,Ohio,1.5,1.2
two,2001,Ohio,1.7,1.3
three,2002,Ohio,3.6,1.4
four,2001,Nevada,2.4,1.5
five,2002,Nevada,2.9,1.6
six,2003,Nevada,3.2,1.7


### Deleting of Columns of Dataframe

In [25]:
frame2["extension"]= frame2.states == "Ohio"
frame2

Unnamed: 0,year,states,pop,debt,extension
one,2000,Ohio,1.5,1.2,True
two,2001,Ohio,1.7,1.3,True
three,2002,Ohio,3.6,1.4,True
four,2001,Nevada,2.4,1.5,False
five,2002,Nevada,2.9,1.6,False
six,2003,Nevada,3.2,1.7,False


In [26]:
# So by using del function we can delete the column of the data frame

del frame2["extension"]
frame2

Unnamed: 0,year,states,pop,debt
one,2000,Ohio,1.5,1.2
two,2001,Ohio,1.7,1.3
three,2002,Ohio,3.6,1.4
four,2001,Nevada,2.4,1.5
five,2002,Nevada,2.9,1.6
six,2003,Nevada,3.2,1.7


# Nested Dictionary of Dictionary


In [27]:
pop = {"Nevada": {2001 : 2.4, 2002 : 2.9},
        "Ohio": {2000 : 1.5, 2001 : 1.7, 2002 : 3.6}}
pop

{'Nevada': {2001: 2.4, 2002: 2.9}, 'Ohio': {2000: 1.5, 2001: 1.7, 2002: 3.6}}

In [28]:
frame3 = pd.DataFrame(pop)
frame3

Unnamed: 0,Nevada,Ohio
2001,2.4,1.7
2002,2.9,3.6
2000,,1.5


In [29]:
frame3.T

Unnamed: 0,2001,2002,2000
Nevada,2.4,2.9,
Ohio,1.7,3.6,1.5


In [30]:
frame3 = pd.DataFrame(pop, index=[2000,2001,2002])
frame3

Unnamed: 0,Nevada,Ohio
2000,,1.5
2001,2.4,1.7
2002,2.9,3.6


In [32]:
frame3.index.name = "year"; frame3.columns.name = "States"
frame3

States,Nevada,Ohio
year,Unnamed: 1_level_1,Unnamed: 2_level_1
2000,,1.5
2001,2.4,1.7
2002,2.9,3.6


In [33]:
frame3.values

array([[nan, 1.5],
       [2.4, 1.7],
       [2.9, 3.6]])

## Pandas Practice On Population Database

In [1]:
# Importing libraries

import pandas as pd
import numpy as np

# Loading the data base of population

p_data = pd.read_csv("FAOSTAT_data_1-12-2022.csv")
p_data

Unnamed: 0,Domain Code,Domain,Area Code (FAO),Area,Element Code,Element,Item Code,Item,Year Code,Year,Unit,Value,Flag,Flag Description,Note
0,OA,Annual population,2,Afghanistan,511,Total Population - Both sexes,3010,Population - Est. & Proj.,1950,1950,1000 persons,7752.118,X,International reliable sources,"UNDESA, Population Division ? World Population..."
1,OA,Annual population,2,Afghanistan,512,Total Population - Male,3010,Population - Est. & Proj.,1950,1950,1000 persons,4099.243,X,International reliable sources,"UNDESA, Population Division ? World Population..."
2,OA,Annual population,2,Afghanistan,513,Total Population - Female,3010,Population - Est. & Proj.,1950,1950,1000 persons,3652.874,X,International reliable sources,"UNDESA, Population Division ? World Population..."
3,OA,Annual population,2,Afghanistan,551,Rural population,3010,Population - Est. & Proj.,1950,1950,1000 persons,7286.991,X,International reliable sources,
4,OA,Annual population,2,Afghanistan,561,Urban population,3010,Population - Est. & Proj.,1950,1950,1000 persons,465.127,X,International reliable sources,"UNDESA, Population Division ? World Urbanizati..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
70580,OA,Annual population,181,Zimbabwe,511,Total Population - Both sexes,3010,Population - Est. & Proj.,2018,2018,1000 persons,14438.802,X,International reliable sources,
70581,OA,Annual population,181,Zimbabwe,512,Total Population - Male,3010,Population - Est. & Proj.,2018,2018,1000 persons,6879.119,X,International reliable sources,
70582,OA,Annual population,181,Zimbabwe,513,Total Population - Female,3010,Population - Est. & Proj.,2018,2018,1000 persons,7559.693,X,International reliable sources,
70583,OA,Annual population,181,Zimbabwe,551,Rural population,3010,Population - Est. & Proj.,2018,2018,1000 persons,11465.748,X,International reliable sources,


In [2]:
p_data.shape

(70585, 15)

In [3]:
pakistan = p_data["Area"]
pakistan

0        Afghanistan
1        Afghanistan
2        Afghanistan
3        Afghanistan
4        Afghanistan
            ...     
70580       Zimbabwe
70581       Zimbabwe
70582       Zimbabwe
70583       Zimbabwe
70584       Zimbabwe
Name: Area, Length: 70585, dtype: object

In [10]:
pak = pd.DataFrame(p_data, columns=["Area Code (FAO)","Area","Year","Value","Element","Element Code"])
pak

Unnamed: 0,Area Code (FAO),Area,Year,Value,Element,Element Code
0,2,Afghanistan,1950,7752.118,Total Population - Both sexes,511
1,2,Afghanistan,1950,4099.243,Total Population - Male,512
2,2,Afghanistan,1950,3652.874,Total Population - Female,513
3,2,Afghanistan,1950,7286.991,Rural population,551
4,2,Afghanistan,1950,465.127,Urban population,561
...,...,...,...,...,...,...
70580,181,Zimbabwe,2018,14438.802,Total Population - Both sexes,511
70581,181,Zimbabwe,2018,6879.119,Total Population - Male,512
70582,181,Zimbabwe,2018,7559.693,Total Population - Female,513
70583,181,Zimbabwe,2018,11465.748,Rural population,551
