### pandas is a software library written for the Python programming language for data manipulation and analysis. In particular, it offers data structures and operations for manipulating numerical tables and time series

### 1.Series

##### A Pandas Series is like a column in a table. It is a one-dimensional array holding data of any type.

In [1]:
import pandas as pd

In [2]:
# supports dataframe and series
import numpy as np

In [3]:
s = pd.Series(np.random.random(5)) # creating series object

In [4]:
s

0    0.715964
1    0.810554
2    0.177299
3    0.968543
4    0.194142
dtype: float64

In [5]:
s[0]   # indexing

0.7159644339728473

In [6]:
s[2:5]  # scling

2    0.177299
3    0.968543
4    0.194142
dtype: float64

In [7]:
s[0] = 567 # update the value

In [8]:
s

0    567.000000
1      0.810554
2      0.177299
3      0.968543
4      0.194142
dtype: float64

In [9]:
s.index = ["a","b","c","d","e"] # change the index names

In [10]:
s

a    567.000000
b      0.810554
c      0.177299
d      0.968543
e      0.194142
dtype: float64

In [14]:
s[0]

567.0

In [15]:
# creating a series object with index names
s = pd.Series(np.random.random(4),index = ["a","b","c","d"])

In [16]:
s

a    0.142164
b    0.442852
c    0.272149
d    0.685654
dtype: float64

In [17]:
# pulling the max value index number
s.argmax()

3

In [18]:
# sorting the values 
s.argsort()

a    0
b    2
c    1
d    3
dtype: int64

In [19]:
# cummulative summation of the data
s.cumsum()

a    0.142164
b    0.585015
c    0.857164
d    1.542818
dtype: float64

In [20]:
# checking the mean
s.mean()

0.3857045350296076

In [24]:
# checking the median
s.median()

0.6424363827309403

In [25]:
# ch3cking the max value
s.max()

0.9820689813508973

In [26]:
# checking the minimun value
s.min()

0.22478259457858052

In [21]:
# checking the descriptuve stats of the data
s.describe()

count    4.000000
mean     0.385705
std      0.234835
min      0.142164
25%      0.239652
50%      0.357500
75%      0.503552
max      0.685654
dtype: float64

## Dataframe

In [22]:
s1 = pd.Series(np.random.random(5))

In [23]:
s2 = pd.Series(np.random.random(5))

In [24]:
s1,s2

(0    0.738462
 1    0.360250
 2    0.812278
 3    0.707691
 4    0.501180
 dtype: float64,
 0    0.435553
 1    0.483788
 2    0.295866
 3    0.192150
 4    0.176405
 dtype: float64)

In [27]:
# creating a dataframe using series object
a= pd.DataFrame([s1,s2])

In [28]:
a

Unnamed: 0,0,1,2,3,4
0,0.738462,0.36025,0.812278,0.707691,0.50118
1,0.435553,0.483788,0.295866,0.19215,0.176405


In [30]:
# create a dataframe with the help of dictionary with age and salary
d = {"age":[22,23,24],"salary":[1000,1020,1040]}

In [31]:
d

{'age': [22, 23, 24], 'salary': [1000, 1020, 1040]}

In [34]:
type(d)

dict

In [32]:
# converting the dict to the dataframe
data = pd.DataFrame(d)

In [33]:
data

Unnamed: 0,age,salary
0,22,1000
1,23,1020
2,24,1040


In [35]:
type(data)

pandas.core.frame.DataFrame

In [36]:
# creating data frame using list
a = [1,2,3,4]
b = [1,2,3,4]

In [37]:
pd.DataFrame([a,b])

Unnamed: 0,0,1,2,3
0,1,2,3,4
1,1,2,3,4


In [38]:
# giving the column names to the data
pd.DataFrame([a,b],columns=['a','b','e','f'])

Unnamed: 0,a,b,e,f
0,1,2,3,4
1,1,2,3,4


In [39]:
data

Unnamed: 0,age,salary
0,22,1000
1,23,1020
2,24,1040


In [40]:
# adding column to the data frame - gender
data['gender'] = ["male","female","male"]

In [41]:
data

Unnamed: 0,age,salary,gender
0,22,1000,male
1,23,1020,female
2,24,1040,male


In [42]:
# iloc and loc functions 
data.iloc[2]

age         24
salary    1040
gender    male
Name: 2, dtype: object

In [43]:
data.loc[2]

age         24
salary    1040
gender    male
Name: 2, dtype: object

In [44]:
data.iloc[:,2] # extract column information

0      male
1    female
2      male
Name: gender, dtype: object

In [45]:
data.iloc[3] # shows error , index beyond limit

IndexError: single positional indexer is out-of-bounds

In [46]:
data['salary'] # retreving the values using colun name

0    1000
1    1020
2    1040
Name: salary, dtype: int64

In [47]:
data.salary

0    1000
1    1020
2    1040
Name: salary, dtype: int64

In [48]:
 # not able  to create a row using iloc
data.iloc[3] =[22,333,"male"]

IndexError: iloc cannot enlarge its target object

In [49]:
data.loc[3] =[22,333,"male"] # creating a row using loc function

In [50]:
data

Unnamed: 0,age,salary,gender
0,22,1000,male
1,23,1020,female
2,24,1040,male
3,22,333,male


In [51]:
data.iloc[1,1] # extract the specified value using iloc

1020

In [52]:
data.shape # displays the rows and columns in the dataset

(4, 3)

In [53]:
data.columns # gives the column names

Index(['age', 'salary', 'gender'], dtype='object')

In [54]:
data.values # gives values of the dataset

array([[22, 1000, 'male'],
       [23, 1020, 'female'],
       [24, 1040, 'male'],
       [22, 333, 'male']], dtype=object)

In [55]:
data.dtypes # gives the data types

age        int64
salary     int64
gender    object
dtype: object

In [56]:
# combine two datasets
pd.concat([data,data])

Unnamed: 0,age,salary,gender
0,22,1000,male
1,23,1020,female
2,24,1040,male
3,22,333,male
0,22,1000,male
1,23,1020,female
2,24,1040,male
3,22,333,male


In [57]:
#drop column from the dataset
data.drop(columns=['gender'],axis=1)

Unnamed: 0,age,salary
0,22,1000
1,23,1020
2,24,1040
3,22,333


In [58]:
data

Unnamed: 0,age,salary,gender
0,22,1000,male
1,23,1020,female
2,24,1040,male
3,22,333,male


In [59]:
#drop column from the dataset
data.drop(columns=['gender'],axis=1,inplace=True)
data

Unnamed: 0,age,salary
0,22,1000
1,23,1020
2,24,1040
3,22,333


In [63]:
data.loc[4] = [22,1000] # creating duplicate values

In [64]:
data

Unnamed: 0,age,salary
0,22,1000
1,23,1020
2,24,1040
3,22,333
4,22,1000


In [65]:
# dropping the duplicate rows
data.drop_duplicates(inplace=True)

In [66]:
data

Unnamed: 0,age,salary
0,22,1000
1,23,1020
2,24,1040
3,22,333


In [67]:
import numpy as np

In [68]:
# creating missing values
data['Experience'] = [1,np.nan,3,4]

In [69]:
data

Unnamed: 0,age,salary,Experience
0,22,1000,1.0
1,23,1020,
2,24,1040,3.0
3,22,333,4.0


In [70]:
# fill the missing values
data['Experience'].fillna(2,inplace=True)

In [71]:
data

Unnamed: 0,age,salary,Experience
0,22,1000,1.0
1,23,1020,2.0
2,24,1040,3.0
3,22,333,4.0


In [72]:
# replacing the values in the datset
data["Experience"].replace({4.0:5.0},inplace=True)

In [73]:
data

Unnamed: 0,age,salary,Experience
0,22,1000,1.0
1,23,1020,2.0
2,24,1040,3.0
3,22,333,5.0


In [78]:
# checking for missing values
data.isnull() 

Unnamed: 0,age,salary,Experience
0,False,False,False
1,False,False,False
2,False,False,False
3,False,False,False
