<a href="https://colab.research.google.com/github/AvinashShrikhande/Pandas-Tricks/blob/main/Pandas_for_DataFrame.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np


### **1.Create an example DataFrame**

In [None]:
df = pd.DataFrame({'col one':[100,200],'col two':[300,400]})
df

Unnamed: 0,col one,col two
0,100,300
1,200,400


In [None]:
pd.DataFrame(np.random.rand(4,8))

Unnamed: 0,0,1,2,3,4,5,6,7
0,0.259616,0.201086,0.982712,0.746865,0.242824,0.004244,0.557932,0.543363
1,0.226521,0.083115,0.694874,0.111572,0.466235,0.883843,0.02844,0.918146
2,0.246831,0.48523,0.838053,0.859808,0.775287,0.824909,0.749766,0.897438
3,0.862735,0.596312,0.457055,0.4235,0.052005,0.307317,0.942778,0.473067


In [None]:
#Enter the same no of string length as column here its 8 col as len("abcdefg")=8 
pd.DataFrame(np.random.rand(4,8), columns = list('abcdefgh'))

Unnamed: 0,a,b,c,d,e,f,g,h
0,0.719095,0.968299,0.054972,0.351252,0.914785,0.140996,0.924949,0.797554
1,0.875943,0.458579,0.136004,0.272966,0.122578,0.613794,0.113736,0.708045
2,0.890696,0.187624,0.36547,0.142295,0.573072,0.327551,0.728319,0.99121
3,0.844225,0.631421,0.347171,0.166562,0.577317,0.840579,0.853156,0.280139


### **2.Rename columns**

In [None]:
df

Unnamed: 0,col one,col two
0,100,300
1,200,400


In [None]:
# Approach 1
df = df.rename({'col one':'col_one','col two':'col_two'},axis='columns')

In [None]:
# Approach 2
df.columns = ['col one','col two']
df.columns = df.columns.str.replace(' ' , '_')

In [None]:
df

In [None]:
df.add_prefix('X_')

Unnamed: 0,X_col_one,X_col_two
0,100,300
1,200,400


In [None]:
df.add_suffix('_Y')

Unnamed: 0,col_one_Y,col_two_Y
0,100,300
1,200,400


### **3.Reverse Row Order**

In [None]:
df.loc[::-1].head()

In [None]:
# If want to reset index from Zero of reversed  
df.loc[::-1].reset_index(drop=True).head()

### **4.Reverse column Order**

In [None]:
# [:,::-1] before "," means rows & after "," means columns
df.loc[:,::-1].head()

### **5.Select column by Data Types**

In [None]:
df.select_dtypes(include=['number','object']).head()

In [None]:
df.select_dtypes(exclude=['number'])

### **6.Convert String to number**

In [None]:
df = pd.DataFrame({'col_one':['1.1','2.2','3.3'],
                   'col_two':['4.4','5.5','6.6'],
                   'col_three':['7.7','8.8','-']})
df

Unnamed: 0,col_one,col_two,col_three
0,1.1,4.4,7.7
1,2.2,5.5,8.8
2,3.3,6.6,-


In [None]:
df.dtypes

col_one      object
col_two      object
col_three    object
dtype: object

In [None]:
df.astype({'col_one':'float','col_two':'float'}).dtypes

col_one      float64
col_two      float64
col_three     object
dtype: object

In [None]:
# If astype used on third column it will give error as it contains '-'
# Instead we use to_numeric() method, it will convert invalid input to NAN
pd.to_numeric(df.col_three, errors='coerce') 

0    7.7
1    8.8
2    NaN
Name: col_three, dtype: float64

In [None]:
# Filling NA values with zeros
pd.to_numeric(df.col_three, errors='coerce').fillna(0)

0    7.7
1    8.8
2    0.0
Name: col_three, dtype: float64

In [None]:
df = df.apply(pd.to_numeric, errors='coerce').fillna(0)
df

Unnamed: 0,col_one,col_two,col_three
0,1.1,4.4,7.7
1,2.2,5.5,8.8
2,3.3,6.6,0.0


In [None]:
df.dtypes

col_one      float64
col_two      float64
col_three    float64
dtype: object

### **7.Reduce DataFrame Size**

In [None]:
# Only reading those columns from csv which we need 
cols=['beer_serving','continent']
small_drinks = pd.read_csv('', usecols=cols)
small_drinks.info()
