# Let's see some basic usages of Pandas and Numpy
### First of all import pandas and numpy

In [1]:
import pandas as pd
import numpy as np

# Let's do some basic stuff

In [2]:
# Define a series of different data types
# np.nan means nothing or no value or a blank value in a data frame or a table , and it's not zero
# or dash and it's Nan
pd.Series([1,2,3,'Parsa',0,np.nan,-1])

0        1
1        2
2        3
3    Parsa
4        0
5      NaN
6       -1
dtype: object

In [3]:
# This gives you some random dates from year : 2025 , month : 01 and day : 01
# periods means how many days of that month and year , you should go forward
dates = pd.date_range('20250101',periods=6)
dates

DatetimeIndex(['2025-01-01', '2025-01-02', '2025-01-03', '2025-01-04',
               '2025-01-05', '2025-01-06'],
              dtype='datetime64[ns]', freq='D')

In [11]:
# This gives you random numbers in 6 rows and 4 columns
# If you use np.random.rand , it gives you random numbers between 0 and 1
# But randn uses normal distribution curve which the middle point is 0 and
# the most you get is something around 0, but one day you may see 1000 as well but too impossible
np.random.randn(6,4)

array([[-0.0214201 , -1.46014435, -0.55836237, -0.48670668],
       [ 1.4498629 , -0.85415785, -0.35021724,  0.12056385],
       [ 0.01211037, -0.50630991,  1.32093909, -0.68260177],
       [-0.38439388, -0.12788343,  0.13781082,  0.41748665],
       [-0.28983157,  0.06640906, -1.29244868,  0.98423694],
       [ 0.31123837, -0.72158816,  1.51911769, -0.7513007 ]])

In [14]:
# Data_Frame = pd.DataFrame(first-part,index=nothing,columns=nothing)
# first-part means the data by which you want to fill the data frame
# index means rows
Data_Frame = pd.DataFrame(np.random.randn(6,4),index=dates,columns=['A','B','C','D'])

In [15]:
Data_Frame

Unnamed: 0,A,B,C,D
2025-01-01,-2.094681,1.484954,0.34257,-0.608132
2025-01-02,2.198727,-0.457548,2.184116,-0.312555
2025-01-03,0.765703,0.067414,-0.466065,-0.671623
2025-01-04,0.668669,-1.49013,0.174384,-0.024745
2025-01-05,0.288436,-0.892103,-0.248659,-0.705887
2025-01-06,0.010047,0.189019,-0.155464,-1.119271


In [16]:
Data_Frame.dtypes

A    float64
B    float64
C    float64
D    float64
dtype: object

In [18]:
# You can access to a specific part of the data frame
# Data_Frame['name of one column']
Data_Frame['A']

2025-01-01   -2.094681
2025-01-02    2.198727
2025-01-03    0.765703
2025-01-04    0.668669
2025-01-05    0.288436
2025-01-06    0.010047
Freq: D, Name: A, dtype: float64

In [33]:
# Just shows the first 5 lines of the whole data
Data_Frame.head()

Unnamed: 0,A,B,C,D
2025-01-01,-2.094681,1.484954,0.34257,-0.608132
2025-01-02,2.198727,-0.457548,2.184116,-0.312555
2025-01-03,0.765703,0.067414,-0.466065,-0.671623
2025-01-04,0.668669,-1.49013,0.174384,-0.024745
2025-01-05,0.288436,-0.892103,-0.248659,-0.705887


In [26]:
# Just shows the first two rows of the whole data frame
Data_Frame.head(2)

Unnamed: 0,A,B,C,D
2025-01-01,-2.094681,1.484954,0.34257,-0.608132
2025-01-02,2.198727,-0.457548,2.184116,-0.312555


In [25]:
# Just shows the last two rows of the whole data frame
Data_Frame.tail(2)

Unnamed: 0,A,B,C,D
2025-01-05,0.288436,-0.892103,-0.248659,-0.705887
2025-01-06,0.010047,0.189019,-0.155464,-1.119271


In [22]:
# This gives you a general view about your data
Data_Frame.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,0.30615,-0.183066,0.305147,-0.573702
std,1.398316,1.026642,0.965676,0.372946
min,-2.094681,-1.49013,-0.466065,-1.119271
25%,0.079644,-0.783465,-0.22536,-0.697321
50%,0.478553,-0.195067,0.00946,-0.639878
75%,0.741444,0.158618,0.300523,-0.38645
max,2.198727,1.484954,2.184116,-0.024745


In [23]:
# Just gives you mean of each column
Data_Frame.mean()

A    0.306150
B   -0.183066
C    0.305147
D   -0.573702
dtype: float64

In [28]:
Data_Frame['A'].mean()

np.float64(0.3061502403782162)

In [29]:
# Just shows rows 0 and 1
Data_Frame[0:2]

Unnamed: 0,A,B,C,D
2025-01-01,-2.094681,1.484954,0.34257,-0.608132
2025-01-02,2.198727,-0.457548,2.184116,-0.312555


In [30]:
# You can transpose rows and columns
Data_Frame.T

Unnamed: 0,2025-01-01,2025-01-02,2025-01-03,2025-01-04,2025-01-05,2025-01-06
A,-2.094681,2.198727,0.765703,0.668669,0.288436,0.010047
B,1.484954,-0.457548,0.067414,-1.49013,-0.892103,0.189019
C,0.34257,2.184116,-0.466065,0.174384,-0.248659,-0.155464
D,-0.608132,-0.312555,-0.671623,-0.024745,-0.705887,-1.119271


In [31]:
# This sorts data frame based on values
# sort_values(by=name of column)
Data_Frame.sort_values(by='B')

Unnamed: 0,A,B,C,D
2025-01-04,0.668669,-1.49013,0.174384,-0.024745
2025-01-05,0.288436,-0.892103,-0.248659,-0.705887
2025-01-02,2.198727,-0.457548,2.184116,-0.312555
2025-01-03,0.765703,0.067414,-0.466065,-0.671623
2025-01-06,0.010047,0.189019,-0.155464,-1.119271
2025-01-01,-2.094681,1.484954,0.34257,-0.608132


In [32]:
# Finds where A is bigger than 0
Data_Frame[Data_Frame['A']>0]

Unnamed: 0,A,B,C,D
2025-01-02,2.198727,-0.457548,2.184116,-0.312555
2025-01-03,0.765703,0.067414,-0.466065,-0.671623
2025-01-04,0.668669,-1.49013,0.174384,-0.024745
2025-01-05,0.288436,-0.892103,-0.248659,-0.705887
2025-01-06,0.010047,0.189019,-0.155464,-1.119271


# Read and write

In [None]:
df.read_csv('csv_file_name.csv')

In [None]:
# Data_Frame.to_csv("where to save data frame and the name which will be saved by that")
Data_Frame.to_csv()

In [None]:
# Fist install openpyxl with pip and you can export your data as an excel file
Data_Frame.to_excel()