# Pandas Tips and Tricks

## 01- How to find the version

In [2]:
import pandas as pd
pd.__version__

'1.3.4'

In [3]:
# Another way
pd.show_versions()


INSTALLED VERSIONS
------------------
commit           : 945c9ed766a61c7d2c0a7cbb251b6edebf9cb7d5
python           : 3.9.7.final.0
python-bits      : 64
OS               : Windows
OS-release       : 10
Version          : 10.0.19044
machine          : AMD64
processor        : Intel64 Family 6 Model 69 Stepping 1, GenuineIntel
byteorder        : little
LC_ALL           : None
LANG             : None
LOCALE           : English_Pakistan.1252

pandas           : 1.3.4
numpy            : 1.20.3
pytz             : 2021.3
dateutil         : 2.8.2
pip              : 21.2.4
setuptools       : 58.0.4
Cython           : 0.29.24
pytest           : 6.2.4
hypothesis       : None
sphinx           : 4.2.0
blosc            : None
feather          : None
xlsxwriter       : 3.0.1
lxml.etree       : 4.6.3
html5lib         : 1.1
pymysql          : None
psycopg2         : None
jinja2           : 2.11.3
IPython          : 7.29.0
pandas_datareader: None
bs4              : 4.10.0
bottleneck       : 1.3.2
fsspe

## 02- Make a DataFrame

In [6]:
# Method 1
df = pd.DataFrame({
    'A Col': [1,2,3,7,8],
    'B Col': [4,5,6,34,65]
})
df.head()

Unnamed: 0,A Col,B Col
0,1,4
1,2,5
2,3,6
3,7,34
4,8,65


In [7]:
# Method 2
import numpy as np
arr = np.array([[1,2,3], [4,5,6], [7,8,9]])
pd.DataFrame(arr)

Unnamed: 0,0,1,2
0,1,2,3
1,4,5,6
2,7,8,9


In [8]:
# Method 3
pd.DataFrame(np.random.rand(4,8))

Unnamed: 0,0,1,2,3,4,5,6,7
0,0.488048,0.770141,0.765664,0.625191,0.892091,0.862631,0.034702,0.51817
1,0.563278,0.890842,0.700192,0.781058,0.462584,0.267907,0.743106,0.322178
2,0.801488,0.477853,0.412481,0.034185,0.214171,0.243537,0.92638,0.061184
3,0.982756,0.682144,0.855883,0.730309,0.618276,0.294079,0.071743,0.726146


In [12]:
pd.DataFrame(np.random.rand(4,9), columns=list('ABCDEFGHI'))

Unnamed: 0,A,B,C,D,E,F,G,H,I
0,0.59404,0.623014,0.486618,0.023301,0.952429,0.720524,0.431436,0.859672,0.963372
1,0.361578,0.537713,0.115563,0.917117,0.610998,0.279676,0.77081,0.04988,0.007337
2,0.629429,0.277316,0.592571,0.427499,0.303611,0.015153,0.195229,0.158953,0.439646
3,0.916204,0.499202,0.307077,0.809933,0.105842,0.365776,0.529024,0.549203,0.230314


## 03- How to Rename Columns

In [13]:
df = pd.DataFrame({
    'A Col': [1,2,3,7,8],
    'B Col': [4,5,6,34,65]
})
df.head()

Unnamed: 0,A Col,B Col
0,1,4
1,2,5
2,3,6
3,7,34
4,8,65


In [15]:
# Method 1
df.rename(columns={'A Col': 'Col_A', 'B Col': 'Col_B'}, inplace=True)
df.head()

Unnamed: 0,Col_A,Col_B
0,1,4
1,2,5
2,3,6
3,7,34
4,8,65


In [16]:
# Method 2
df.columns = ['col_aa', 'col_bb']
df.head()

Unnamed: 0,col_aa,col_bb
0,1,4
1,2,5
2,3,6
3,7,34
4,8,65


In [18]:
# Rename any Specific character
df.columns = df.columns.str.replace('_', '*')
df.head()

Unnamed: 0,col*aa,col*bb
0,1,4
1,2,5
2,3,6
3,7,34
4,8,65


In [21]:
# Adding prefix to columns
df = df.add_prefix('baba_')
df.head()

Unnamed: 0,baba_col*aa,baba_col*bb
0,1,4
1,2,5
2,3,6
3,7,34
4,8,65


In [22]:
# Adding suffix to columns
df = df.add_suffix('haha')
df.head()

Unnamed: 0,baba_col*aahaha,baba_col*bbhaha
0,1,4
1,2,5
2,3,6
3,7,34
4,8,65


In [23]:
df.columns = ['col_a', 'col_b']
df.head()

Unnamed: 0,col_a,col_b
0,1,4
1,2,5
2,3,6
3,7,34
4,8,65


## 04- Using Template Data

In [31]:
# import libraries
import pandas as pd
import numpy as np
import seaborn as sns

# import dataset
df = sns.load_dataset('tips')
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [25]:
# Summary of Data
df.describe()

Unnamed: 0,total_bill,tip,size
count,244.0,244.0,244.0
mean,19.785943,2.998279,2.569672
std,8.902412,1.383638,0.9511
min,3.07,1.0,1.0
25%,13.3475,2.0,2.0
50%,17.795,2.9,2.0
75%,24.1275,3.5625,3.0
max,50.81,10.0,6.0


In [28]:
# Saving Dataset
df.to_csv('tips.csv')
df.to_excel('tips.xlsx')

## 05- Using Your own Data

In [33]:
# import dataset from local drive
df = pd.read_csv('tips.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,0,16.99,1.01,Female,No,Sun,Dinner,2
1,1,10.34,1.66,Male,No,Sun,Dinner,3
2,2,21.01,3.5,Male,No,Sun,Dinner,3
3,3,23.68,3.31,Male,No,Sun,Dinner,2
4,4,24.59,3.61,Female,No,Sun,Dinner,4


## 06- 