# Pandas: 
#### Pandas is an opensource data analysis library written in Python. leverages the power and speed of numpy to make data analysis and preprocessing easy for data scientist. It provides rich and highly robust data operations. Numpy written in C.

## Data structure in Pandas

#### Pandas has two main data structures that make working with data easy and intuitive:

### Series:

- A Series is like a list of values, but each value has a label, called an "index."
- Similar to single column of data with labeled rows.
- You can access values by their labels, making data retrieval quick and readable.


In [214]:
import pandas as pd

# Creating a Series
series = pd.Series([10, 20, 30], index=['A', 'B', 'C'])
print(type(series))
series


<class 'pandas.core.series.Series'>


A    10
B    20
C    30
dtype: int64

In [215]:
pd.__version__

'2.2.3'

### DataFrame:

- A DataFrame is like a table with rows and columns, where each column is a Series.
- It can hold data of different types (e.g., text, numbers) in each column.
- DataFrames have both row and column labels, which makes it easier to access and manipulate data based on these labels.

In [216]:
import pandas as pd

# Creating a DataFrame from a dictionary
data = {
    'Name': ['Navin', 'Sam', 'Charlie'],
    'Age': [32, 30, 35],
    'City': ['New York', 'Los Angeles', 'Chicago']
}
print(type(data))
df = pd.DataFrame(data)
df


<class 'dict'>


Unnamed: 0,Name,Age,City
0,Navin,32,New York
1,Sam,30,Los Angeles
2,Charlie,35,Chicago


In [217]:
type(df)

pandas.core.frame.DataFrame

In [218]:
# tells data types of each column
df.dtypes


Name    object
Age      int64
City    object
dtype: object

### Creating a large dataframe

In [219]:
import numpy as np
df1=pd.DataFrame(np.random.randn(1110,10),columns=['A','B','C','D','E','F','G','H','I','J'],index=[np.arange(1110)])

# Multiplying each value by 100
df1 = df1 * 100
df1.head()

Unnamed: 0,A,B,C,D,E,F,G,H,I,J
0,-10.889739,-20.408553,5.739614,-33.653061,-41.350859,170.574877,19.873322,92.579591,-100.991916,-37.779986
1,9.472594,-34.800922,-38.007847,-57.851296,-17.913485,-68.681942,-38.030156,121.895336,-78.779295,23.541364
2,61.43776,2.447014,75.063139,134.234852,-3.351666,-111.701472,-108.449985,-159.148683,-99.648407,43.055682
3,173.70306,60.080724,3.226901,-54.797864,-4.493914,101.018042,144.013973,98.852963,69.177349,-23.450221
4,242.998186,181.925193,60.823145,-66.537097,-24.804575,22.800079,-225.800602,-105.461087,92.369243,94.329291


In [220]:
df1.dtypes

A    float64
B    float64
C    float64
D    float64
E    float64
F    float64
G    float64
H    float64
I    float64
J    float64
dtype: object

In [221]:
#changing the data type of column A to Object from float
df1.at[0,'A']="gaurav"
df1.dtypes

  df1.at[0,'A']="gaurav"


A     object
B    float64
C    float64
D    float64
E    float64
F    float64
G    float64
H    float64
I    float64
J    float64
dtype: object

In [222]:
df1.head()

Unnamed: 0,A,B,C,D,E,F,G,H,I,J
0,gaurav,-20.408553,5.739614,-33.653061,-41.350859,170.574877,19.873322,92.579591,-100.991916,-37.779986
1,9.472594,-34.800922,-38.007847,-57.851296,-17.913485,-68.681942,-38.030156,121.895336,-78.779295,23.541364
2,61.43776,2.447014,75.063139,134.234852,-3.351666,-111.701472,-108.449985,-159.148683,-99.648407,43.055682
3,173.70306,60.080724,3.226901,-54.797864,-4.493914,101.018042,144.013973,98.852963,69.177349,-23.450221
4,242.998186,181.925193,60.823145,-66.537097,-24.804575,22.800079,-225.800602,-105.461087,92.369243,94.329291


In [223]:
df1.index

MultiIndex([(   0,),
            (   1,),
            (   2,),
            (   3,),
            (   4,),
            (   5,),
            (   6,),
            (   7,),
            (   8,),
            (   9,),
            ...
            (1100,),
            (1101,),
            (1102,),
            (1103,),
            (1104,),
            (1105,),
            (1106,),
            (1107,),
            (1108,),
            (1109,)],
           length=1110)

In [224]:
df1 = df1.reset_index(drop=True)
df1.index

RangeIndex(start=0, stop=1110, step=1)

In [225]:
df1.columns

Index(['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J'], dtype='object')

In [226]:
#to convert dataframe into numpy array
df1.to_numpy()

array([['gaurav', -20.408553458911502, 5.739614452123125, ...,
        92.5795910394813, -100.99191633250388, -37.779985991385374],
       [9.472593950644526, -34.8009224061383, -38.00784672658554, ...,
        121.89533597039355, -78.77929533091225, 23.541363701279117],
       [61.437759785070334, 2.447013506112617, 75.06313861824826, ...,
        -159.14868346146358, -99.64840735241077, 43.055681640529706],
       ...,
       [64.59215663990769, -38.39963318775222, 34.79091723945157, ...,
        43.52916910636213, -82.11509549580033, -181.52474692805137],
       [-3.213452750077557, -117.62128016399768, 35.0639046875964, ...,
        11.278956613162302, -78.81896957006143, 42.691766762307054],
       [-11.70356472672181, -49.50233867834682, -225.63757835298247, ...,
        111.18400688805939, 1.6131981530855988, 107.72955453941296]],
      dtype=object)

In [227]:
#Another Example
dict_new={
    "name":['Gaurav','Navin Sir','Harsh Bhaiya','Sushil'],
    "marks":[96,98,85,88],
    "city":['Gaya','Bengaluru','Jodhpur','Bikaner']
    
}

In [228]:
dframe=pd.DataFrame(dict_new)

In [229]:
dframe

Unnamed: 0,name,marks,city
0,Gaurav,96,Gaya
1,Navin Sir,98,Bengaluru
2,Harsh Bhaiya,85,Jodhpur
3,Sushil,88,Bikaner


In [230]:
#we can convert this data frame to any file format. Let's say we want to convert it into .csv
#index means row in general and column ko column hi bolte hain.
dframe.to_csv('office.csv',index=False)

In [231]:
dframe.head(2)

Unnamed: 0,name,marks,city
0,Gaurav,96,Gaya
1,Navin Sir,98,Bengaluru


In [232]:
dframe.tail(2)

Unnamed: 0,name,marks,city
2,Harsh Bhaiya,85,Jodhpur
3,Sushil,88,Bikaner


In [233]:
#it does a numerical analysis
dframe.describe()

Unnamed: 0,marks
count,4.0
mean,91.75
std,6.238322
min,85.0
25%,87.25
50%,92.0
75%,96.5
max,98.0


In [234]:
#to read a csv file
flight_details=pd.read_csv('flight_details.csv');

In [235]:
flight_details

Unnamed: 0,flight_no,price,destination
0,GS1234,9600,Gaya
1,NR1234,9800,Bengaluru
2,HB1234,8500,Jodhpur
3,SR1234,8800,Bikaner


In [236]:
flight_details['price']

0    9600
1    9800
2    8500
3    8800
Name: price, dtype: int64

In [237]:
flight_details['price'][2]=10000

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  flight_details['price'][2]=10000


In [238]:
flight_details

Unnamed: 0,flight_no,price,destination
0,GS1234,9600,Gaya
1,NR1234,9800,Bengaluru
2,HB1234,10000,Jodhpur
3,SR1234,8800,Bikaner


### to change the index of data frame

In [239]:
flight_details.index=['first','second','third','fourth']

In [240]:
flight_details

Unnamed: 0,flight_no,price,destination
first,GS1234,9600,Gaya
second,NR1234,9800,Bengaluru
third,HB1234,10000,Jodhpur
fourth,SR1234,8800,Bikaner


# Pandas Attributes

In [241]:
#transpose
df.T

Unnamed: 0,0,1,2
Name,Navin,Sam,Charlie
Age,32,30,35
City,New York,Los Angeles,Chicago


In [242]:
# sort index in backward manner
#axis 0 is index and axis 1 is column
df1.sort_index(axis=0,ascending=False)

Unnamed: 0,A,B,C,D,E,F,G,H,I,J
1109,-11.703565,-49.502339,-225.637578,-70.812613,111.733345,18.110378,60.935992,111.184007,1.613198,107.729555
1108,-3.213453,-117.621280,35.063905,133.275948,193.120050,-224.619881,-100.259631,11.278957,-78.818970,42.691767
1107,64.592157,-38.399633,34.790917,-111.129662,65.244396,32.763210,122.639716,43.529169,-82.115095,-181.524747
1106,-34.012321,-42.103241,-84.189662,-57.361026,-82.083183,89.022737,-30.106020,-15.685506,-0.893197,-145.496660
1105,-227.493172,-76.166076,39.261461,-103.211214,77.589157,-43.375846,42.939319,-68.730090,53.995769,-119.550241
...,...,...,...,...,...,...,...,...,...,...
4,242.998186,181.925193,60.823145,-66.537097,-24.804575,22.800079,-225.800602,-105.461087,92.369243,94.329291
3,173.70306,60.080724,3.226901,-54.797864,-4.493914,101.018042,144.013973,98.852963,69.177349,-23.450221
2,61.43776,2.447014,75.063139,134.234852,-3.351666,-111.701472,-108.449985,-159.148683,-99.648407,43.055682
1,9.472594,-34.800922,-38.007847,-57.851296,-17.913485,-68.681942,-38.030156,121.895336,-78.779295,23.541364


In [243]:
df1['A']

0           gaurav
1         9.472594
2         61.43776
3        173.70306
4       242.998186
           ...    
1105   -227.493172
1106    -34.012321
1107     64.592157
1108     -3.213453
1109    -11.703565
Name: A, Length: 1110, dtype: object

In [244]:
type(df1['A'])

pandas.core.series.Series

## View and Copy

In [245]:
#View
df2=df1
df2.at[0,'A']="Sharma"
df1
#you can see, changing df2 changes df1 as df2 is only a view of df1. so changes in df2 will lead to changes in df1.

Unnamed: 0,A,B,C,D,E,F,G,H,I,J
0,Sharma,-20.408553,5.739614,-33.653061,-41.350859,170.574877,19.873322,92.579591,-100.991916,-37.779986
1,9.472594,-34.800922,-38.007847,-57.851296,-17.913485,-68.681942,-38.030156,121.895336,-78.779295,23.541364
2,61.43776,2.447014,75.063139,134.234852,-3.351666,-111.701472,-108.449985,-159.148683,-99.648407,43.055682
3,173.70306,60.080724,3.226901,-54.797864,-4.493914,101.018042,144.013973,98.852963,69.177349,-23.450221
4,242.998186,181.925193,60.823145,-66.537097,-24.804575,22.800079,-225.800602,-105.461087,92.369243,94.329291
...,...,...,...,...,...,...,...,...,...,...
1105,-227.493172,-76.166076,39.261461,-103.211214,77.589157,-43.375846,42.939319,-68.730090,53.995769,-119.550241
1106,-34.012321,-42.103241,-84.189662,-57.361026,-82.083183,89.022737,-30.106020,-15.685506,-0.893197,-145.496660
1107,64.592157,-38.399633,34.790917,-111.129662,65.244396,32.763210,122.639716,43.529169,-82.115095,-181.524747
1108,-3.213453,-117.621280,35.063905,133.275948,193.120050,-224.619881,-100.259631,11.278957,-78.818970,42.691767


In [246]:
#Copy
df3=df1.copy()
df3.at[0,'A']="Java"
df1
#one can see that changes in the copied dataframe df3 does not affect df1.

Unnamed: 0,A,B,C,D,E,F,G,H,I,J
0,Sharma,-20.408553,5.739614,-33.653061,-41.350859,170.574877,19.873322,92.579591,-100.991916,-37.779986
1,9.472594,-34.800922,-38.007847,-57.851296,-17.913485,-68.681942,-38.030156,121.895336,-78.779295,23.541364
2,61.43776,2.447014,75.063139,134.234852,-3.351666,-111.701472,-108.449985,-159.148683,-99.648407,43.055682
3,173.70306,60.080724,3.226901,-54.797864,-4.493914,101.018042,144.013973,98.852963,69.177349,-23.450221
4,242.998186,181.925193,60.823145,-66.537097,-24.804575,22.800079,-225.800602,-105.461087,92.369243,94.329291
...,...,...,...,...,...,...,...,...,...,...
1105,-227.493172,-76.166076,39.261461,-103.211214,77.589157,-43.375846,42.939319,-68.730090,53.995769,-119.550241
1106,-34.012321,-42.103241,-84.189662,-57.361026,-82.083183,89.022737,-30.106020,-15.685506,-0.893197,-145.496660
1107,64.592157,-38.399633,34.790917,-111.129662,65.244396,32.763210,122.639716,43.529169,-82.115095,-181.524747
1108,-3.213453,-117.621280,35.063905,133.275948,193.120050,-224.619881,-100.259631,11.278957,-78.818970,42.691767


In [247]:
df3

Unnamed: 0,A,B,C,D,E,F,G,H,I,J
0,Java,-20.408553,5.739614,-33.653061,-41.350859,170.574877,19.873322,92.579591,-100.991916,-37.779986
1,9.472594,-34.800922,-38.007847,-57.851296,-17.913485,-68.681942,-38.030156,121.895336,-78.779295,23.541364
2,61.43776,2.447014,75.063139,134.234852,-3.351666,-111.701472,-108.449985,-159.148683,-99.648407,43.055682
3,173.70306,60.080724,3.226901,-54.797864,-4.493914,101.018042,144.013973,98.852963,69.177349,-23.450221
4,242.998186,181.925193,60.823145,-66.537097,-24.804575,22.800079,-225.800602,-105.461087,92.369243,94.329291
...,...,...,...,...,...,...,...,...,...,...
1105,-227.493172,-76.166076,39.261461,-103.211214,77.589157,-43.375846,42.939319,-68.730090,53.995769,-119.550241
1106,-34.012321,-42.103241,-84.189662,-57.361026,-82.083183,89.022737,-30.106020,-15.685506,-0.893197,-145.496660
1107,64.592157,-38.399633,34.790917,-111.129662,65.244396,32.763210,122.639716,43.529169,-82.115095,-181.524747
1108,-3.213453,-117.621280,35.063905,133.275948,193.120050,-224.619881,-100.259631,11.278957,-78.818970,42.691767


## Deep and Shallow Copy in Pandas


### Shallow Copy
#### A shallow copy creates a new DataFrame object, but it still references the original data. This means that some modifications to the shallow copy may affect the original DataFrame as both share the same data in memory.

 a shallow copy can be done using df4 = df1[:]

In [248]:
#shallow copy
df4=df1[:]
df4

Unnamed: 0,A,B,C,D,E,F,G,H,I,J
0,Sharma,-20.408553,5.739614,-33.653061,-41.350859,170.574877,19.873322,92.579591,-100.991916,-37.779986
1,9.472594,-34.800922,-38.007847,-57.851296,-17.913485,-68.681942,-38.030156,121.895336,-78.779295,23.541364
2,61.43776,2.447014,75.063139,134.234852,-3.351666,-111.701472,-108.449985,-159.148683,-99.648407,43.055682
3,173.70306,60.080724,3.226901,-54.797864,-4.493914,101.018042,144.013973,98.852963,69.177349,-23.450221
4,242.998186,181.925193,60.823145,-66.537097,-24.804575,22.800079,-225.800602,-105.461087,92.369243,94.329291
...,...,...,...,...,...,...,...,...,...,...
1105,-227.493172,-76.166076,39.261461,-103.211214,77.589157,-43.375846,42.939319,-68.730090,53.995769,-119.550241
1106,-34.012321,-42.103241,-84.189662,-57.361026,-82.083183,89.022737,-30.106020,-15.685506,-0.893197,-145.496660
1107,64.592157,-38.399633,34.790917,-111.129662,65.244396,32.763210,122.639716,43.529169,-82.115095,-181.524747
1108,-3.213453,-117.621280,35.063905,133.275948,193.120050,-224.619881,-100.259631,11.278957,-78.818970,42.691767


### Deep Copy
#### A deep copy creates a new DataFrame object with entirely independent data. Changes made to a deep copy won’t affect the original DataFrame, as it has a separate memory allocation.

In [249]:
#deep copy
df5 = df1.copy()
df5

Unnamed: 0,A,B,C,D,E,F,G,H,I,J
0,Sharma,-20.408553,5.739614,-33.653061,-41.350859,170.574877,19.873322,92.579591,-100.991916,-37.779986
1,9.472594,-34.800922,-38.007847,-57.851296,-17.913485,-68.681942,-38.030156,121.895336,-78.779295,23.541364
2,61.43776,2.447014,75.063139,134.234852,-3.351666,-111.701472,-108.449985,-159.148683,-99.648407,43.055682
3,173.70306,60.080724,3.226901,-54.797864,-4.493914,101.018042,144.013973,98.852963,69.177349,-23.450221
4,242.998186,181.925193,60.823145,-66.537097,-24.804575,22.800079,-225.800602,-105.461087,92.369243,94.329291
...,...,...,...,...,...,...,...,...,...,...
1105,-227.493172,-76.166076,39.261461,-103.211214,77.589157,-43.375846,42.939319,-68.730090,53.995769,-119.550241
1106,-34.012321,-42.103241,-84.189662,-57.361026,-82.083183,89.022737,-30.106020,-15.685506,-0.893197,-145.496660
1107,64.592157,-38.399633,34.790917,-111.129662,65.244396,32.763210,122.639716,43.529169,-82.115095,-181.524747
1108,-3.213453,-117.621280,35.063905,133.275948,193.120050,-224.619881,-100.259631,11.278957,-78.818970,42.691767
