In [38]:
import pandas as pd

In [45]:
california = pd.read_csv('/content/california.csv')

In [46]:
california

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms
0,-122.05,37.37,27.0,3885.0,661.0
1,-118.3,34.26,43.0,1510.0,310.0
2,-117.81,33.78,27.0,3589.0,507.0
3,-118.36,33.82,28.0,67.0,15.0
4,-119.67,36.33,19.0,1241.0,244.0


<br>

**Accessing Data from Dataframe**

In [52]:
# Single columns

california['latitude']

Unnamed: 0,latitude
0,37.37
1,34.26
2,33.78
3,33.82
4,36.33


In [54]:
# Multiple Columns

california[['latitude', 'total_rooms']]

Unnamed: 0,latitude,total_rooms
0,37.37,3885.0
1,34.26,1510.0
2,33.78,3589.0
3,33.82,67.0
4,36.33,1241.0


In [56]:
# Taking data types

california.dtypes

Unnamed: 0,0
longitude,float64
latitude,float64
housing_median_age,float64
total_rooms,float64
total_bedrooms,float64


<br>

**Storing Data**

In [71]:
data = '''id,name,category,description,comments
1,Apple,Fruit,Red and sweet,Good for snacks
2,Banana,Fruit,Yellow and soft,Rich in potassium
3,Carrot,Vegetable,Orange and crunchy,Great for salads
4,Desk,Furniture,Wooden study desk,Needs assembly
5,Chair,Furniture,Comfortable office chair,Ergonomic'''

In [76]:
from io import StringIO
df = pd.read_csv(StringIO(data))

df.to_csv('new_csv.csv', index=True)

In [79]:
from io import StringIO
df = pd.read_csv(StringIO(data))

df.to_excel("new_excel.xlsx", index=True)

   id    name   category               description           comments
0   1   Apple      Fruit             Red and sweet    Good for snacks
1   2  Banana      Fruit           Yellow and soft  Rich in potassium
2   3  Carrot  Vegetable        Orange and crunchy   Great for salads
3   4    Desk  Furniture         Wooden study desk     Needs assembly
4   5   Chair  Furniture  Comfortable office chair          Ergonomic


<br>

**Data Type Filtering**

In [87]:
df.dtypes[df.dtypes=='float64'].describe()

Unnamed: 0,0
count,5
unique,1
top,float64
freq,5


**Slicing**

In [101]:
new_df = df[1:3]
new_df

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms
1,-118.3,34.26,43.0,1510.0,310.0
2,-117.81,33.78,27.0,3589.0,507.0


In [105]:
import numpy as np

new_df['longitude'] = np.random.randint(100, 430)
new_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['longitude'] = np.random.randint(100, 430)


Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms
1,308,34.26,43.0,1510.0,310.0
2,308,33.78,27.0,3589.0,507.0


In [109]:
new_df['lati_longi'] = df['latitude'].astype(str) + " " + df['longitude'].astype(str)
new_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['lati_longi'] = df['latitude'].astype(str) + " " + df['longitude'].astype(str)


Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,lati_longi
1,308,34.26,43.0,1510.0,310.0,34.26 -118.3
2,308,33.78,27.0,3589.0,507.0,33.78 -117.81


In [108]:
print(new_df)

   longitude  latitude  housing_median_age  total_rooms  total_bedrooms  \
1        308     34.26                43.0       1510.0           310.0   
2        308     33.78                27.0       3589.0           507.0   

                                          lati_longi  
1  0    37.37\n1    34.26\n2    33.78\n3    33.82...  
2  0    37.37\n1    34.26\n2    33.78\n3    33.82...  


In [5]:
import seaborn as sns

# List all available datasets
datasets = sns.get_dataset_names()
datasets

['anagrams',
 'anscombe',
 'attention',
 'brain_networks',
 'car_crashes',
 'diamonds',
 'dots',
 'dowjones',
 'exercise',
 'flights',
 'fmri',
 'geyser',
 'glue',
 'healthexp',
 'iris',
 'mpg',
 'penguins',
 'planets',
 'seaice',
 'taxis',
 'tips',
 'titanic']

In [6]:
df = sns.load_dataset('titanic')
df = df.iloc[:10]
df

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True
5,0,3,male,,0,0,8.4583,Q,Third,man,True,,Queenstown,no,True
6,0,1,male,54.0,0,0,51.8625,S,First,man,True,E,Southampton,no,True
7,0,3,male,2.0,3,1,21.075,S,Third,child,False,,Southampton,no,False
8,1,3,female,27.0,0,2,11.1333,S,Third,woman,False,,Southampton,yes,False
9,1,2,female,14.0,1,0,30.0708,C,Second,child,False,,Cherbourg,yes,False


In [7]:
import pandas as pd

pd.Categorical(df['embark_town'])



['Southampton', 'Cherbourg', 'Southampton', 'Southampton', 'Southampton', 'Queenstown', 'Southampton', 'Southampton', 'Southampton', 'Cherbourg']
Categories (3, object): ['Cherbourg', 'Queenstown', 'Southampton']

In [8]:
df[df['age']>30]

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True
6,0,1,male,54.0,0,0,51.8625,S,First,man,True,E,Southampton,no,True


In [9]:
df[df['fare'] == max(df['fare'])]['who']

Unnamed: 0,who
1,woman


In [10]:
df.iloc[2:4, 0:5]  # Row from 2:4 and col from 0,1,2

Unnamed: 0,survived,pclass,sex,age,sibsp
2,1,3,female,26.0,0
3,1,1,female,35.0,1


In [11]:
df.loc[2:4, ['who', 'age']]

Unnamed: 0,who,age
2,woman,26.0
3,woman,35.0
4,man,35.0


In [19]:
df.groupby('embark_town').size()


Unnamed: 0_level_0,0
embark_town,Unnamed: 1_level_1
Cherbourg,2
Queenstown,1
Southampton,7


In [17]:
df['embark_town'].value_counts()


Unnamed: 0_level_0,count
embark_town,Unnamed: 1_level_1
Southampton,7
Cherbourg,2
Queenstown,1


In [23]:
new_slice =  df[['fare','embarked', 'class', 'who','adult_male']][5:9]
new_slice

Unnamed: 0,fare,embarked,class,who,adult_male
5,8.4583,Q,Third,man,True
6,51.8625,S,First,man,True
7,21.075,S,Third,child,False
8,11.1333,S,Third,woman,False


In [24]:
concat_data1 = pd.concat([new_slice, df],  axis=1)
concat_data1

Unnamed: 0,fare,embarked,class,who,adult_male,survived,pclass,sex,age,sibsp,parch,fare.1,embarked.1,class.1,who.1,adult_male.1,deck,embark_town,alive,alone
5,8.4583,Q,Third,man,True,0,3,male,,0,0,8.4583,Q,Third,man,True,,Queenstown,no,True
6,51.8625,S,First,man,True,0,1,male,54.0,0,0,51.8625,S,First,man,True,E,Southampton,no,True
7,21.075,S,Third,child,False,0,3,male,2.0,3,1,21.075,S,Third,child,False,,Southampton,no,False
8,11.1333,S,Third,woman,False,1,3,female,27.0,0,2,11.1333,S,Third,woman,False,,Southampton,yes,False
0,,,,,,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,,,,,,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,,,,,,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,,,,,,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,,,,,,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True
9,,,,,,1,2,female,14.0,1,0,30.0708,C,Second,child,False,,Cherbourg,yes,False


In [25]:
concat_data1 = pd.concat([new_slice, df],  axis=0)
concat_data1

Unnamed: 0,fare,embarked,class,who,adult_male,survived,pclass,sex,age,sibsp,parch,deck,embark_town,alive,alone
5,8.4583,Q,Third,man,True,,,,,,,,,,
6,51.8625,S,First,man,True,,,,,,,,,,
7,21.075,S,Third,child,False,,,,,,,,,,
8,11.1333,S,Third,woman,False,,,,,,,,,,
0,7.25,S,Third,man,True,0.0,3.0,male,22.0,1.0,0.0,,Southampton,no,False
1,71.2833,C,First,woman,False,1.0,1.0,female,38.0,1.0,0.0,C,Cherbourg,yes,False
2,7.925,S,Third,woman,False,1.0,3.0,female,26.0,0.0,0.0,,Southampton,yes,True
3,53.1,S,First,woman,False,1.0,1.0,female,35.0,1.0,0.0,C,Southampton,yes,False
4,8.05,S,Third,man,True,0.0,3.0,male,35.0,0.0,0.0,,Southampton,no,True
5,8.4583,Q,Third,man,True,0.0,3.0,male,,0.0,0.0,,Queenstown,no,True


In [26]:
new_slice

Unnamed: 0,fare,embarked,class,who,adult_male
5,8.4583,Q,Third,man,True
6,51.8625,S,First,man,True
7,21.075,S,Third,child,False
8,11.1333,S,Third,woman,False


In [28]:
new_slice['fare'].apply(lambda x: x*2)

Unnamed: 0,fare
5,16.9166
6,103.725
7,42.15
8,22.2666


In [29]:
new_slice.reset_index(drop=True)

Unnamed: 0,fare,embarked,class,who,adult_male
0,8.4583,Q,Third,man,True
1,51.8625,S,First,man,True
2,21.075,S,Third,child,False
3,11.1333,S,Third,woman,False


In [34]:
for row in new_slice.iterrows():
  print(row)

(5, fare          8.4583
embarked           Q
class          Third
who              man
adult_male      True
Name: 5, dtype: object)
(6, fare          51.8625
embarked            S
class           First
who               man
adult_male       True
Name: 6, dtype: object)
(7, fare          21.075
embarked           S
class          Third
who            child
adult_male     False
Name: 7, dtype: object)
(8, fare          11.1333
embarked            S
class           Third
who             woman
adult_male      False
Name: 8, dtype: object)


In [35]:
new_slice

Unnamed: 0,fare,embarked,class,who,adult_male
5,8.4583,Q,Third,man,True
6,51.8625,S,First,man,True
7,21.075,S,Third,child,False
8,11.1333,S,Third,woman,False


In [36]:
def subtract(x):
  return abs(x-10)

new_slice['fare'].apply(subtract) vs new_slice['fare'].apply(lambda x: x-10)

Unnamed: 0,fare
5,1.5417
6,41.8625
7,11.075
8,1.1333


In [37]:

new_slice['fare'].apply(lambda x: x-10)

Unnamed: 0,fare
5,-1.5417
6,41.8625
7,11.075
8,1.1333


In [39]:
new_slice['fare'].sort_values(ascending=False)

Unnamed: 0,fare
6,51.8625
7,21.075
8,11.1333
5,8.4583


In [40]:
new_slice['fare'].sort_index(ascending=True)

Unnamed: 0,fare
5,8.4583
6,51.8625
7,21.075
8,11.1333


In [42]:
new_slice['fare'].rolling(window=2).mean()

Unnamed: 0,fare
5,
6,30.1604
7,36.46875
8,16.10415


In [46]:

import pandas as pd

dates = pd.date_range(start='1998-03-02', end='2005-06-05', periods=10)
pd.DataFrame(dates)

Unnamed: 0,0
0,1998-03-02 00:00:00
1,1998-12-21 16:00:00
2,1999-10-12 08:00:00
3,2000-08-02 00:00:00
4,2001-05-23 16:00:00
5,2002-03-14 08:00:00
6,2003-01-03 00:00:00
7,2003-10-24 16:00:00
8,2004-08-14 08:00:00
9,2005-06-05 00:00:00


In [49]:
pd.Timedelta(days=5)

start = pd.Timestamp('2025-10-31')
end = start + pd.Timedelta(days=10)

print(end)


2025-11-10 00:00:00
