* Collecting and Storing
* Filtering Sampling
* Filling and removing Duplicates
* Merging and Splitting 

In [1]:
import pandas as pd
import numpy as np
obj = pd.Series([4, 7, -5, 3])
print(obj)

from random import randrange as rr

0    4
1    7
2   -5
3    3
dtype: int64


In [2]:
sales = pd.Series([100,200,300,400])
print(sales.values)
print(sales.index)

[100 200 300 400]
RangeIndex(start=0, stop=4, step=1)


In [3]:
sales = pd.Series([100,200,300,400], index = ['Jan', 'Feb', 'Mar', 'Apr'])
print(sales)
print(sales.values)
print(sales.index)

Jan    100
Feb    200
Mar    300
Apr    400
dtype: int64
[100 200 300 400]
Index(['Jan', 'Feb', 'Mar', 'Apr'], dtype='object')


In [4]:
sales = pd.Series([100,200,300,400], index = ['Jan', 'Feb', 'Mar', 'Apr'], name = "4 Month Sales")
print(sales)

Jan    100
Feb    200
Mar    300
Apr    400
Name: 4 Month Sales, dtype: int64


In [5]:
print(sales[sales > 100])

Feb    200
Mar    300
Apr    400
Name: 4 Month Sales, dtype: int64


In [6]:
sales

Jan    100
Feb    200
Mar    300
Apr    400
Name: 4 Month Sales, dtype: int64

In [7]:
print(sales["Jan"])
print(sales[2])
print(sales[["Feb", "Apr"]])
print(sales[[0, 2]])

100
300
Feb    200
Apr    400
Name: 4 Month Sales, dtype: int64
Jan    100
Mar    300
Name: 4 Month Sales, dtype: int64


In [8]:
data = pd.Series([100, 200, 300, 400, 500, 600, 700], 
                 index = ["mon", "tues", "wed", "thurs", "fri", "sat", "sun"])

In [9]:
from random import randrange as rr 
series = pd.Series([rr(10, 50) for x in range(7)], index = ["mon", "tues", "wed", "thurs", "fri", "sat", "sun"])

print(series*2)
series *= 2
print(series)
print(series[["mon", "tues"]])

mon      94
tues     44
wed      26
thurs    24
fri      74
sat      52
sun      28
dtype: int64
mon      94
tues     44
wed      26
thurs    24
fri      74
sat      52
sun      28
dtype: int64
mon     94
tues    44
dtype: int64


In [10]:
sdata = {"Sindh" : 35000, "punjab" : 45000, "KPK" : 30000, "Balochistan" : 200000}
tax_by_state = pd.Series(sdata,
                        index = ["punjab", "Sindh", "KPK", "Balochistan", "GB", "Another"])
print(tax_by_state)
print(pd.isnull(tax_by_state))
tax_by_state["GB"] = 200
print(tax_by_state)
print(tax_by_state.fillna(800))

punjab          45000.0
Sindh           35000.0
KPK             30000.0
Balochistan    200000.0
GB                  NaN
Another             NaN
dtype: float64
punjab         False
Sindh          False
KPK            False
Balochistan    False
GB              True
Another         True
dtype: bool
punjab          45000.0
Sindh           35000.0
KPK             30000.0
Balochistan    200000.0
GB                200.0
Another             NaN
dtype: float64
punjab          45000.0
Sindh           35000.0
KPK             30000.0
Balochistan    200000.0
GB                200.0
Another           800.0
dtype: float64


In [14]:
# gedit
apples = pd.Series([1,2,3,4], index = ['a', 'b', 'c', 'd'])
oranges = pd.Series([5,6,7,8], ['mon', 'tue', 'wed', 'thu'])

data = {"apples" : apples,
       "oranges" : oranges,}
print(data,"\n")
fruits_df = pd.DataFrame(data)
print(fruits_df)

{'apples': a    1
b    2
c    3
d    4
dtype: int64, 'oranges': mon    5
tue    6
wed    7
thu    8
dtype: int64} 

     apples  oranges
a       1.0      NaN
b       2.0      NaN
c       3.0      NaN
d       4.0      NaN
mon     NaN      5.0
thu     NaN      8.0
tue     NaN      6.0
wed     NaN      7.0


In [17]:
apples.index = oranges.index
data = {"apples" : apples,
       "oranges" : oranges,}
print(data,"\n")
fruits_df = pd.DataFrame(data)
print(fruits_df)

{'apples': mon    1
tue    2
wed    3
thu    4
dtype: int64, 'oranges': mon    5
tue    6
wed    7
thu    8
dtype: int64} 

     apples  oranges
mon       1        5
tue       2        6
wed       3        7
thu       4        8


In [3]:
state = ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada', 'Nevada']
data = {'state' : state,
       'year' : [2000, 2001, 2002, 2003, 2004, 2005],
       'pop' : [1.5, 1.7, 3.6, 7.9, 2.5, 8.2]}
state_pop_df = pd.DataFrame(data, columns = ['year', 'state', 'pop', 'debt'],
                           index = ["1st", '2nd', '3rd', '4th', '5th', '6th'])
state_pop_df.head()
# state_pop_df.fillna('200$')

Unnamed: 0,year,state,pop,debt
1st,2000,Ohio,1.5,
2nd,2001,Ohio,1.7,
3rd,2002,Ohio,3.6,
4th,2003,Nevada,7.9,
5th,2004,Nevada,2.5,


In [4]:
rng = len(state_pop_df)
rng = np.arange(rng)
state_pop_df['debt'] = rng
state_pop_df

Unnamed: 0,year,state,pop,debt
1st,2000,Ohio,1.5,0
2nd,2001,Ohio,1.7,1
3rd,2002,Ohio,3.6,2
4th,2003,Nevada,7.9,3
5th,2004,Nevada,2.5,4
6th,2005,Nevada,8.2,5


In [13]:
frame2 = pd.DataFrame(data, columns = ["year", 'state', 'pop', 'debt'],
                     index = ['one', 'two', 'three', 'four', 'five', 'six'])

ln = len(frame2)
rng = np.arange(ln)
print(rng)
frame2['debt']
frame2

[0 1 2 3 4 5]


Unnamed: 0,year,state,pop,debt
one,2000,Ohio,1.5,
two,2001,Ohio,1.7,
three,2002,Ohio,3.6,
four,2003,Nevada,7.9,
five,2004,Nevada,2.5,
six,2005,Nevada,8.2,


In [14]:
val = pd.Series([-1.2, -1.5, -1.7, 2.6, 3.6, 4.6],
               index = ['two', 'four', 'five', 'six', 'seven', 'eight'])
lst = np.array( val.values )
frame2['debt'] = val
frame2

Unnamed: 0,year,state,pop,debt
one,2000,Ohio,1.5,
two,2001,Ohio,1.7,-1.2
three,2002,Ohio,3.6,
four,2003,Nevada,7.9,-1.5
five,2004,Nevada,2.5,-1.7
six,2005,Nevada,8.2,2.6


In [17]:
obj3 = pd.Series(['blue', 'purple', 'yellow'], index = [0, 3, 6])
print(obj3)
obj3 = obj3.reindex(range(9))
obj3

0      blue
3    purple
6    yellow
dtype: object


0      blue
1       NaN
2       NaN
3    purple
4       NaN
5       NaN
6    yellow
7       NaN
8       NaN
dtype: object

In [22]:
states = pd.DataFrame(np.arange(9).reshape((3, 3)),
index = ['a', 'c', 'd'], columns = ['Ohio', 'Texas', 'California'])
print(states)
states = states.reindex(['a', 'b', 'c', 'd'], method = 'ffill')
print(states)

   Ohio  Texas  California
a     0      1           2
c     3      4           5
d     6      7           8
   Ohio  Texas  California
a     0      1           2
b     0      1           2
c     3      4           5
d     6      7           8


# Assignment :  
#### We have to study chapter number 5 

In [None]:
s = pd.