**1) convert list into into series of elements**

In [1]:
import numpy as np
import pandas as pd
my_data = [10, 20, 30, 40, 50]
pd.Series(data=my_data)

0    10
1    20
2    30
3    40
4    50
dtype: int64

**2) convert dictionary into series of elements**

In [2]:
d={'a':10, 'b':20, 'c':30, 'd':40}
pd.Series(d)

a    10
b    20
c    30
d    40
dtype: int64

**3) addition of two series**

In [4]:
ser1=pd.Series([1,2,3,4], ["india", "china", "nepal", "russia"])
print(ser1)
ser2=pd.Series([1,2,5,4], ["india", "china", "bhutan", "russia"])
print("\n")
print(ser2)
ser1+ser2

india     1
china     2
nepal     3
russia    4
dtype: int64


india     1
china     2
bhutan    5
russia    4
dtype: int64


bhutan    NaN
china     4.0
india     2.0
nepal     NaN
russia    8.0
dtype: float64

**4) converting into data frame and indexing**

In [5]:
data = {'Name': ['Tom', 'Jack', 'steve', 'Ricky'], 'Age': [28, 34, 29, 42]}
df = pd.DataFrame(data, index=['rank1', 'rank2', 'rank3', 'rank4'])
print(df)

        Name  Age
rank1    Tom   28
rank2   Jack   34
rank3  steve   29
rank4  Ricky   42


**5) create dataframe & select columns**

In [6]:
from numpy.random import randn
import pandas as pd
np.random.seed(101)
df=pd.DataFrame(randn(5, 4), ['A', 'B', 'C', 'D', 'E'], ["W", "X", "Y", "Z"])
print(df)
print("\n")
print(df["W"])
print("\n")
df[["W", "Z"]]

          W         X         Y         Z
A  2.706850  0.628133  0.907969  0.503826
B  0.651118 -0.319318 -0.848077  0.605965
C -2.018168  0.740122  0.528813 -0.589001
D  0.188695 -0.758872 -0.933237  0.955057
E  0.190794  1.978757  2.605967  0.683509


A    2.706850
B    0.651118
C   -2.018168
D    0.188695
E    0.190794
Name: W, dtype: float64




Unnamed: 0,W,Z
A,2.70685,0.503826
B,0.651118,0.605965
C,-2.018168,-0.589001
D,0.188695,0.955057
E,0.190794,0.683509


**6) data manipulation: data selection**

In [7]:
df1 = df.drop("W", axis=1, inplace=True)  #droping column indexed w
print(df1)
df.loc['A']     #fetch particular row from dataset having index 'A'
print(df.iloc[3]) #fetch 3rd row from dataset

None
X   -0.758872
Y   -0.933237
Z    0.955057
Name: D, dtype: float64


In [8]:
df.loc[['A', 'C'], ['X', "Z"]]    #fetch a subset of data from given dataset

Unnamed: 0,X,Z
A,0.628133,0.503826
C,0.740122,-0.589001


In [9]:
print(df > 0)

       X      Y      Z
A   True   True   True
B  False  False   True
C   True   True  False
D  False  False   True
E   True   True   True


In [10]:
print(df[df > 0]) #fetch original values

          X         Y         Z
A  0.628133  0.907969  0.503826
B       NaN       NaN  0.605965
C  0.740122  0.528813       NaN
D       NaN       NaN  0.955057
E  1.978757  2.605967  0.683509


In [13]:
df = pd.DataFrame(randn(5, 4), ['A', 'B', 'C', 'D', 'E'], ["W", "X", "Y", "Z"])
df[df['W'] > 0]
df[df['W'] > 0][['X', 'Y']]
#fetch out desired frame of X & Y from dataset, for those rows where value is more
df.reset_index()  #assign natural index
df.set_index("Z")   #set 'Z' column as index value

Unnamed: 0_level_0,W,X,Y
Z,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0.230336,0.38603,2.084019,-0.376519
1.939932,0.681209,1.035125,-0.03116
-0.732845,-1.005187,-0.74179,0.187125
-2.141212,-1.38292,1.482495,0.961458
1.292765,0.992573,1.192241,-1.04678


**7) data manipulation: drop missing elements**

In [27]:
import pandas as pd
d = {'A': [1, 2, np.NaN], 'B': [1, np.NaN, np.NaN], 'c': [1, 2, 3]}
#np.NaN is the missing element in Dataframe
df = pd.DataFrame(d)
df
df.dropna()   #pandas would drop any row with missing value
df.dropna(axis=1)   #drop column with NULL value
df.dropna(thresh=2)

Unnamed: 0,A,B,c
0,1.0,1.0,1
1,2.0,,2


**8) data manipulation: filling suitable value**

In [28]:
df.fillna(value = 'FILL VALUE')   #NaN is replaced by value = FILL VALUE
df['A'].fillna(value=df["A"].mean())
#select column "A" and fill the missing value with mean value of the column A OR
df['A'].fillna(value=df['A'].std())
#select column "A" and fill the missing value with standard deviation value of the column

0    1.000000
1    2.000000
2    0.707107
Name: A, dtype: float64

**9) replacing NA with a scalar value is equivalent behaviour of the fillna() function**

In [29]:
df = pd.DataFrame({'one': [10, 20, 30, 40, 50, 2000], 'two': [1000, 0, 30, 40, 50, 60]})
print(df)
print("\n")
print(df.replace({1000: 10, 2000: 60}))

    one   two
0    10  1000
1    20     0
2    30    30
3    40    40
4    50    50
5  2000    60


   one  two
0   10   10
1   20    0
2   30   30
3   40   40
4   50   50
5   60   60


**10) Groupby() function**

In [41]:
data = {"Company": ["CompA", "CompA", "CompB", "CompB", "CompC", "CompC"],
        "Person": ["Rajesh", "Pradeep", "Amit", "Rakesh", "Suresh", "Raj"],
        "Sales": [200, 120, 340, 124, 243, 350]}
df = pd.DataFrame(data)
df
comp = df.groupby("Company")["Sales"].mean()
comp
# comp1 = df.groupby("Company")["Sales"]   #grouping done using label name "Company"
# comp1.std()     #apply standard deviation on grouped data

Company
CompA    160.0
CompB    232.0
CompC    296.5
Name: Sales, dtype: float64