In [2]:
!pip install pandas --upgrade

Collecting pandas
  Downloading pandas-2.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.3 MB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.3/12.3 MB[0m [31m26.9 MB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m0:01[0m01[0m
Installing collected packages: pandas
  Attempting uninstall: pandas
    Found existing installation: pandas 2.2.2
    Uninstalling pandas-2.2.2:
      Successfully uninstalled pandas-2.2.2
Successfully installed pandas-2.3.1


In [10]:
import pandas as pd
import numpy as np

In [6]:
pd.__version__

'2.3.1'

## Datatype of Pandas

1. DataFrame : 2D Table with rows and columns
2. Series : 1D array-like , can hold any data type such as integers, strings, floating-point numbers, and Python objects

## Creating DataFrame


In [7]:
data = {
    "name": ["Bob","Charlie","Aman"],
    "age":[25,30,35]
}
data

{'name': ['Bob', 'Charlie', 'Aman'], 'age': [25, 30, 35]}

In [8]:
df = pd.DataFrame(data)

In [9]:
df

Unnamed: 0,name,age
0,Bob,25
1,Charlie,30
2,Aman,35


In [11]:
arr_data = np.array(
    [
        ["bob" , 25],
        ["Charlie", 30],
        ["Aman",35]
    ]
)

In [14]:
pd.DataFrame(arr_data, columns=["Name","age"])

Unnamed: 0,Name,age
0,bob,25
1,Charlie,30
2,Aman,35


## Reading Data From Files

- `pd.read_csv(file_path, delimiter=',')`: Reads CSV (Comma Separated Value) file and returns a Pandas DataFrame
    - *file_path*: Path to csv file
    - *delimiter*: Symbol separating different columns (default=',')

In [19]:
titanic_df = pd.read_csv("titanic-data.csv", delimiter=",", header=0)

In [21]:
titanic_df.shape

(891, 12)

In [22]:
titanic_df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


- `pd.read_json(file_path, delimiter=',')`: Reads Json file and returns a Pandas DataFrame
    - *file_path*: Path to csv file
    - *delimiter*: Symbol separating different columns (default=',')

In [27]:
json_data = pd.read_json("data.json")

In [28]:
type(json_data)

pandas.core.frame.DataFrame

## Accessing Data

In [37]:
name = titanic_df["Name"]
# name = titanic_df.Name
name

0                                Braund, Mr. Owen Harris
1      Cumings, Mrs. John Bradley (Florence Briggs Th...
2                                 Heikkinen, Miss. Laina
3           Futrelle, Mrs. Jacques Heath (Lily May Peel)
4                               Allen, Mr. William Henry
                             ...                        
886                                Montvila, Rev. Juozas
887                         Graham, Miss. Margaret Edith
888             Johnston, Miss. Catherine Helen "Carrie"
889                                Behr, Mr. Karl Howell
890                                  Dooley, Mr. Patrick
Name: Name, Length: 891, dtype: object

In [38]:
name_age = titanic_df[["Name","Age"]]
name_age

Unnamed: 0,Name,Age
0,"Braund, Mr. Owen Harris",22.0
1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",38.0
2,"Heikkinen, Miss. Laina",26.0
3,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",35.0
4,"Allen, Mr. William Henry",35.0
...,...,...
886,"Montvila, Rev. Juozas",27.0
887,"Graham, Miss. Margaret Edith",19.0
888,"Johnston, Miss. Catherine Helen ""Carrie""",
889,"Behr, Mr. Karl Howell",26.0


In [47]:
name_age.loc[:2,"Age"]

0    22.0
1    38.0
2    26.0
Name: Age, dtype: float64

---

In [48]:
titanic_df.columns

Index(['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp',
       'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'],
      dtype='object')

In [49]:
titanic_df.shape

(891, 12)

In [50]:
titanic_df.dtypes

PassengerId      int64
Survived         int64
Pclass           int64
Name            object
Sex             object
Age            float64
SibSp            int64
Parch            int64
Ticket          object
Fare           float64
Cabin           object
Embarked        object
dtype: object

In [53]:
titanic_df.head(3)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S


In [55]:
titanic_df.tail(4)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.45,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0,C148,C
890,891,0,3,"Dooley, Mr. Patrick",male,32.0,0,0,370376,7.75,,Q
