# Pandas Tips and Tricks

## 01- How to find the version

In [1]:
import pandas as pd
pd.__version__

'1.3.4'

In [2]:
# Another way
pd.show_versions()


INSTALLED VERSIONS
------------------
commit           : 945c9ed766a61c7d2c0a7cbb251b6edebf9cb7d5
python           : 3.9.7.final.0
python-bits      : 64
OS               : Windows
OS-release       : 10
Version          : 10.0.19044
machine          : AMD64
processor        : Intel64 Family 6 Model 69 Stepping 1, GenuineIntel
byteorder        : little
LC_ALL           : None
LANG             : None
LOCALE           : English_Pakistan.1252

pandas           : 1.3.4
numpy            : 1.20.3
pytz             : 2021.3
dateutil         : 2.8.2
pip              : 21.2.4
setuptools       : 58.0.4
Cython           : 0.29.24
pytest           : 6.2.4
hypothesis       : None
sphinx           : 4.2.0
blosc            : None
feather          : None
xlsxwriter       : 3.0.1
lxml.etree       : 4.6.3
html5lib         : 1.1
pymysql          : None
psycopg2         : None
jinja2           : 2.11.3
IPython          : 7.29.0
pandas_datareader: None
bs4              : 4.10.0
bottleneck       : 1.3.2
fsspe

## 02- Make a DataFrame

In [3]:
# Method 1
df = pd.DataFrame({
    'A Col': [1,2,3,7,8],
    'B Col': [4,5,6,34,65]
})
df.head()

Unnamed: 0,A Col,B Col
0,1,4
1,2,5
2,3,6
3,7,34
4,8,65


In [4]:
# Method 2
import numpy as np
arr = np.array([[1,2,3], [4,5,6], [7,8,9]])
pd.DataFrame(arr)

Unnamed: 0,0,1,2
0,1,2,3
1,4,5,6
2,7,8,9


In [5]:
# Method 3
pd.DataFrame(np.random.rand(4,8))

Unnamed: 0,0,1,2,3,4,5,6,7
0,0.520696,0.281294,0.898231,0.762289,0.473464,0.246602,0.520594,0.742727
1,0.673608,0.609321,0.439136,0.965792,0.160379,0.292747,0.936239,0.368097
2,0.958488,0.500491,0.675518,0.007869,0.091839,0.216158,0.749521,0.341853
3,0.803434,0.723253,0.092252,0.787798,0.507607,0.783766,0.217691,0.2779


In [6]:
pd.DataFrame(np.random.rand(4,9), columns=list('ABCDEFGHI'))

Unnamed: 0,A,B,C,D,E,F,G,H,I
0,0.550634,0.405515,0.437939,0.106501,0.05519,0.344821,0.741935,0.544415,0.379058
1,0.548297,0.33131,0.210745,0.23013,0.675901,0.923943,0.480303,0.583198,0.749846
2,0.822629,0.532171,0.507738,0.670042,0.380661,0.044877,0.851625,0.135735,0.992915
3,0.032711,0.970907,0.048611,0.382642,0.111289,0.17922,0.725025,0.325443,0.204811


## 03- How to Rename Columns

In [7]:
df = pd.DataFrame({
    'A Col': [1,2,3,7,8],
    'B Col': [4,5,6,34,65]
})
df.head()

Unnamed: 0,A Col,B Col
0,1,4
1,2,5
2,3,6
3,7,34
4,8,65


In [8]:
# Method 1
df.rename(columns={'A Col': 'Col_A', 'B Col': 'Col_B'}, inplace=True)
df.head()

Unnamed: 0,Col_A,Col_B
0,1,4
1,2,5
2,3,6
3,7,34
4,8,65


In [9]:
# Method 2
df.columns = ['col_aa', 'col_bb']
df.head()

Unnamed: 0,col_aa,col_bb
0,1,4
1,2,5
2,3,6
3,7,34
4,8,65


In [10]:
# Rename any Specific character
df.columns = df.columns.str.replace('_', '*')
df.head()

Unnamed: 0,col*aa,col*bb
0,1,4
1,2,5
2,3,6
3,7,34
4,8,65


In [11]:
# Adding prefix to columns
df = df.add_prefix('baba_')
df.head()

Unnamed: 0,baba_col*aa,baba_col*bb
0,1,4
1,2,5
2,3,6
3,7,34
4,8,65


In [12]:
# Adding suffix to columns
df = df.add_suffix('haha')
df.head()

Unnamed: 0,baba_col*aahaha,baba_col*bbhaha
0,1,4
1,2,5
2,3,6
3,7,34
4,8,65


In [13]:
df.columns = ['col_a', 'col_b']
df.head()

Unnamed: 0,col_a,col_b
0,1,4
1,2,5
2,3,6
3,7,34
4,8,65


## 04- Using Template Data

In [14]:
# import libraries
import pandas as pd
import numpy as np
import seaborn as sns

# import dataset
df = sns.load_dataset('tips')
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [15]:
# Summary of Data
df.describe()

Unnamed: 0,total_bill,tip,size
count,244.0,244.0,244.0
mean,19.785943,2.998279,2.569672
std,8.902412,1.383638,0.9511
min,3.07,1.0,1.0
25%,13.3475,2.0,2.0
50%,17.795,2.9,2.0
75%,24.1275,3.5625,3.0
max,50.81,10.0,6.0


In [16]:
# Saving Dataset
df.to_csv('tips.csv')
df.to_excel('tips.xlsx')

## 05- Using Your own Data

In [17]:
# import dataset from local drive
df = pd.read_csv('tips.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,0,16.99,1.01,Female,No,Sun,Dinner,2
1,1,10.34,1.66,Male,No,Sun,Dinner,3
2,2,21.01,3.5,Male,No,Sun,Dinner,3
3,3,23.68,3.31,Male,No,Sun,Dinner,2
4,4,24.59,3.61,Female,No,Sun,Dinner,4


## 06- 