# Pandas: 
#### Pandas is an opensource data analysis library written in Python. leverages the power and speed of numpy to make data analysis and preprocessing easy for data scientist. It provides rich and highly robust data operations. Numpy written in C.

## Data structure in Pandas

#### Pandas has two main data structures that make working with data easy and intuitive:

### Series:

- A Series is like a list of values, but each value has a label, called an "index."
- Similar to single column of data with labeled rows.
- You can access values by their labels, making data retrieval quick and readable.


In [187]:
import pandas as pd

# Creating a Series
series = pd.Series([10, 20, 30], index=['A', 'B', 'C'])
print(type(series))
series


<class 'pandas.core.series.Series'>


A    10
B    20
C    30
dtype: int64

In [188]:
pd.__version__

'2.2.3'

### DataFrame:

- A DataFrame is like a table with rows and columns, where each column is a Series.
- It can hold data of different types (e.g., text, numbers) in each column.
- DataFrames have both row and column labels, which makes it easier to access and manipulate data based on these labels.

In [189]:
import pandas as pd

# Creating a DataFrame from a dictionary
data = {
    'Name': ['Navin', 'Sam', 'Charlie'],
    'Age': [32, 30, 35],
    'City': ['New York', 'Los Angeles', 'Chicago']
}
print(type(data))
df = pd.DataFrame(data)
df


<class 'dict'>


Unnamed: 0,Name,Age,City
0,Navin,32,New York
1,Sam,30,Los Angeles
2,Charlie,35,Chicago


In [190]:
type(df)

pandas.core.frame.DataFrame

In [191]:
# tells data types of each column
df.dtypes


Name    object
Age      int64
City    object
dtype: object

### Creating a large dataframe

In [192]:
import numpy as np
df1=pd.DataFrame(np.random.randn(1110,10),columns=['A','B','C','D','E','F','G','H','I','J'],index=[np.arange(1110)])

# Multiplying each value by 100
df1 = df1 * 100
df1.head()

Unnamed: 0,A,B,C,D,E,F,G,H,I,J
0,-49.5503,94.033351,51.801003,6.302811,-193.876938,-203.353006,182.765482,36.082115,181.613638,-85.116671
1,-134.167892,47.61798,86.874029,110.635254,149.527943,61.302466,-32.897669,-13.177029,16.297715,-53.357741
2,-104.877785,-43.633596,59.167541,-67.685991,-107.514726,33.605,15.253928,39.043326,108.130387,56.759205
3,-55.401966,-158.611824,-24.752201,-207.463013,31.573769,15.39159,174.776584,-4.560032,-134.683225,241.276564
4,-62.11235,-156.129359,-28.835745,36.540755,-127.615464,1.061238,19.534054,-105.777215,-92.797891,-135.763248


In [193]:
df1.dtypes

A    float64
B    float64
C    float64
D    float64
E    float64
F    float64
G    float64
H    float64
I    float64
J    float64
dtype: object

In [194]:
#changing the data type of column A to Object from float
df1.at[0,'A']="gaurav"
df1.dtypes

  df1.at[0,'A']="gaurav"


A     object
B    float64
C    float64
D    float64
E    float64
F    float64
G    float64
H    float64
I    float64
J    float64
dtype: object

In [195]:
df1.head()

Unnamed: 0,A,B,C,D,E,F,G,H,I,J
0,gaurav,94.033351,51.801003,6.302811,-193.876938,-203.353006,182.765482,36.082115,181.613638,-85.116671
1,-134.167892,47.61798,86.874029,110.635254,149.527943,61.302466,-32.897669,-13.177029,16.297715,-53.357741
2,-104.877785,-43.633596,59.167541,-67.685991,-107.514726,33.605,15.253928,39.043326,108.130387,56.759205
3,-55.401966,-158.611824,-24.752201,-207.463013,31.573769,15.39159,174.776584,-4.560032,-134.683225,241.276564
4,-62.11235,-156.129359,-28.835745,36.540755,-127.615464,1.061238,19.534054,-105.777215,-92.797891,-135.763248


In [196]:
df1.index

MultiIndex([(   0,),
            (   1,),
            (   2,),
            (   3,),
            (   4,),
            (   5,),
            (   6,),
            (   7,),
            (   8,),
            (   9,),
            ...
            (1100,),
            (1101,),
            (1102,),
            (1103,),
            (1104,),
            (1105,),
            (1106,),
            (1107,),
            (1108,),
            (1109,)],
           length=1110)

In [197]:
df1 = df1.reset_index(drop=True)
df1.index

RangeIndex(start=0, stop=1110, step=1)

In [198]:
df1.columns

Index(['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J'], dtype='object')

In [199]:
#to convert dataframe into numpy array
df1.to_numpy()

array([['gaurav', 94.03335123818556, 51.80100271517086, ...,
        36.08211456470777, 181.61363757100295, -85.11667134012782],
       [-134.1678921206389, 47.61798027251673, 86.87402935639817, ...,
        -13.177028867640553, 16.29771537007029, -53.35774118441458],
       [-104.87778524931856, -43.63359581880376, 59.16754058029133, ...,
        39.043326011586984, 108.13038698513652, 56.75920468413991],
       ...,
       [86.5593079841271, 42.47579357104807, 15.698198618537122, ...,
        -71.66328341436831, -32.76676719603097, 184.07904118156958],
       [193.13664287253428, -28.438763888997563, -8.522000105585626, ...,
        120.80873087034793, 49.59221357821795, -32.76982874382773],
       [-22.61375552056046, -13.434235906916737, -90.34273625675851, ...,
        75.36426964251288, -51.59480970195192, -4.4058962384444245]],
      dtype=object)

In [200]:
#Another Example
dict_new={
    "name":['Gaurav','Navin Sir','Harsh Bhaiya','Sushil'],
    "marks":[96,98,85,88],
    "city":['Gaya','Bengaluru','Jodhpur','Bikaner']
    
}

In [201]:
dframe=pd.DataFrame(dict_new)

In [202]:
dframe

Unnamed: 0,name,marks,city
0,Gaurav,96,Gaya
1,Navin Sir,98,Bengaluru
2,Harsh Bhaiya,85,Jodhpur
3,Sushil,88,Bikaner


In [203]:
#we can convert this data frame to any file format. Let's say we want to convert it into .csv
#index means row in general and column ko column hi bolte hain.
dframe.to_csv('office.csv',index=False)

In [204]:
dframe.head(2)

Unnamed: 0,name,marks,city
0,Gaurav,96,Gaya
1,Navin Sir,98,Bengaluru


In [205]:
dframe.tail(2)

Unnamed: 0,name,marks,city
2,Harsh Bhaiya,85,Jodhpur
3,Sushil,88,Bikaner


In [206]:
#it does a numerical analysis
dframe.describe()

Unnamed: 0,marks
count,4.0
mean,91.75
std,6.238322
min,85.0
25%,87.25
50%,92.0
75%,96.5
max,98.0


In [207]:
#to read a csv file
flight_details=pd.read_csv('flight_details.csv');

In [208]:
flight_details

Unnamed: 0,flight_no,price,destination
0,GS1234,9600,Gaya
1,NR1234,9800,Bengaluru
2,HB1234,8500,Jodhpur
3,SR1234,8800,Bikaner


In [209]:
flight_details['price']

0    9600
1    9800
2    8500
3    8800
Name: price, dtype: int64

In [210]:
flight_details['price'][2]=10000

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  flight_details['price'][2]=10000


In [211]:
flight_details

Unnamed: 0,flight_no,price,destination
0,GS1234,9600,Gaya
1,NR1234,9800,Bengaluru
2,HB1234,10000,Jodhpur
3,SR1234,8800,Bikaner


### to change the index of data frame

In [212]:
flight_details.index=['first','second','third','fourth']

In [213]:
flight_details

Unnamed: 0,flight_no,price,destination
first,GS1234,9600,Gaya
second,NR1234,9800,Bengaluru
third,HB1234,10000,Jodhpur
fourth,SR1234,8800,Bikaner
