# Pandas

## 1D Indexed Array

In [23]:
import pandas as pd
import numpy as np

In [7]:
pd.Series([0.25, 3, 5, 3])
# 1d array in pandas is called series
# index is assigned automatically when a dataframe or series is created

0    0.25
1    3.00
2    5.00
3    3.00
dtype: float64

In [8]:
# manually assign indices
pd.Series([0.25, 3, 5, 3], index=['a', 'b', 'c', 'd'])

a    0.25
b    3.00
c    5.00
d    3.00
dtype: float64

In [16]:
# converting dict to series

dict = {"name" : "Ibad-ur-Rehman",
        "age" : 20}

my_series = pd.Series(dict)
print(my_series,"\n")
print(my_series['age'])

name    Ibad-ur-Rehman
age                 20
dtype: object 

20


In [17]:
s1 = pd.Series([0.25, 3, 5, 3])
s3 = s1+3
print(s3)

0    3.25
1    6.00
2    8.00
3    6.00
dtype: float64


In [22]:
# some conditions
s2 = s1[s1<=3] # print(where s1 is less than or equal to 3)
print(s2)

0    0.25
1    3.00
3    3.00
dtype: float64


In [27]:
# converting numpy array to series
np_arr = np.array([13, 23, 45, 44])
pandas_series = pd.Series(np_arr)
print(pandas_series)

0    13
1    23
2    45
3    44
dtype: int32


In [29]:
# we can perform numpy operations on pandas series
sqrt = np.sqrt(pandas_series)
print(sqrt)

0    3.605551
1    4.795832
2    6.708204
3    6.633250
dtype: float64


In [31]:
# pd series using 2 numpy arrays 1 as index and other as values
index = np.array(['a', 'b', 'c', 'd'])
values = np.array([1, 2, 3, 4])
pd_series = pd.Series(values, index=index)
print(pd_series)


a    1
b    2
c    3
d    4
dtype: int32


In [3]:
s1 = pd.Series([1, 2, 3, 4])
s1.size

4

In [7]:
# mean max, and min of a pandas series
s1 = pd.Series([12, 34, 45,56, 67])
print(s1.mean())
print(s1.max())
print(s1.min())


42.8
67
12


In [12]:
# sorted values
s1 = pd.Series([122, 34, 5,36, 7])
print(s1.sort_values(), '\n\n')

# unique values(delete duplications
s2 = pd.Series([13, 14, 14, 15, 15, 2, 4])
print(s2.unique())
print(s1.nunique()) # print number of elements existed ignoring repetition

2      5
4      7
1     34
3     36
0    122
dtype: int64 


[13 14 15  2  4]
5


In [13]:
# describe short summary of all characteristics of a series
series = pd.Series([13, 14, 14, 15, 15, 2, 4])
print(series.describe())

count     7.000000
mean     11.000000
std       5.537749
min       2.000000
25%       8.500000
50%      14.000000
75%      14.500000
max      15.000000
dtype: float64


In [18]:
# series to dataframe
book_name = pd.Series(['AI', 'ML'])
book_no = pd.Series([12, 13])
dic = {"book" : book_name, "b_no" : book_no}
# dic = {"book" : ['AI', 'ML'], "b_no" : [12, 13]} # same
df = pd.DataFrame(dic)
print(df)

  book  b_no
0   AI    12
1   ML    13


In [24]:
# json to df
l = [{"book" : "book_name1", "b_no" : "book_no1"}, 
   {"book" : "book_name2", "b_no" : "book_no2"},
   {"b_no" : "book_no3", "book" : "book_name3"}] # order doesnot matter but keys must exists for values to be in same column, if a diff format is found a new column was made

df = pd.DataFrame(l)
df

Unnamed: 0,book,b_no
0,book_name1,book_no1
1,book_name2,book_no2
2,book_name3,book_no3


In [28]:
# iteration by rows
for (row_index, row_value) in df.iterrows():
    print(row_index, '\n\n', row_value, '\n\n\n')

#accessing data columnwise


0 

 book    book_name1
b_no      book_no1
Name: 0, dtype: object 



1 

 book    book_name2
b_no      book_no2
Name: 1, dtype: object 



2 

 book    book_name3
b_no      book_no3
Name: 2, dtype: object 





In [36]:
for (col_index, col_value) in df.items():
    print(col_index, '\n\n', col_value, '\n\n\n')

book 

 0    book_name1
1    book_name2
2    book_name3
Name: book, dtype: object 



b_no 

 0    book_no1
1    book_no2
2    book_no3
Name: b_no, dtype: object 





In [46]:
# Add rename or delete a column
s = pd.Series([12, 32, 2, 4])
df = pd.DataFrame(s)
df.columns = ['list']
df['list1'] = 20 # makes new column list1 with all entries 20
df['list2'] = df['list'] + df['list1']
print(df)
del df['list'] # df.pop('list') do same work
print(df)

   list  list1  list2
0    12     20     32
1    32     20     52
2     2     20     22
3     4     20     24
   list1  list2
0     20     32
1     20     52
2     20     22
3     20     24


In [52]:
# deleting using drop
new_df1 = df.drop('list1', axis=1) # axis = 1 represents columnwise deletion operation
print(new_df1)
new_df2 = df.drop(index=[2,3], axis=0) # deletes row with index 2 and 3
print(new_df2)

   list2
0     32
1     52
2     22
3     24
   list1  list2
0     20     32
1     20     52


In [68]:
# boolean indexing and loc

dic = {"book" : ['AI', 'ML', 'DS', 'DSA'], "b_no" : [12, 13, 14, 15]}
df = pd.DataFrame(dic, index=[True, False, True, False])
print(df.loc[True])
print(df.loc[False])


     book  b_no
True   AI    12
True   DS    14
      book  b_no
False   ML    13
False  DSA    15


In [74]:
# concatenating
dic1 = {"book" : ['AI', 'ML', 'DS', 'DSA'], "b_no" : [12, 13, 14, 15]}
df = pd.DataFrame(dic)
print(df)

dic1 = {"book" : ['Automata', 'Web', 'LA', 'DLD'], "b_no" : [12, 13, 14, 15]}
df2 = pd.DataFrame(dic1)
print(df2)

df3 = pd.concat([df, df2])
print(df3)

  book  b_no
0   AI    12
1   ML    13
2   DS    14
3  DSA    15
       book  b_no
0  Automata    12
1       Web    13
2        LA    14
3       DLD    15
       book  b_no
0        AI    12
1        ML    13
2        DS    14
3       DSA    15
0  Automata    12
1       Web    13
2        LA    14
3       DLD    15
