In [4]:
import numpy as np
import pandas as pd
from numpy.random import randn

## Dataframe Creation

### DataFrame() function

pd.DataFrame(`data`, `index`, `columns`, `dtype`, `copy`)

In [26]:
df = pd.DataFrame(randn(5,4))

In [28]:
df = pd.DataFrame(randn(5,4), index = ["A", "B", "C", "D", "E"], columns = ["W", "X", "Y", "Z"])
df

Unnamed: 0,W,X,Y,Z
A,-1.22176,-0.524426,-0.638774,0.264339
B,0.68681,0.265185,-0.234714,-0.998813
C,1.505252,-0.64682,-0.784118,-0.067121
D,-0.643758,-0.45424,-0.869,0.873063
E,0.569303,0.017401,0.170616,-0.558829


### from itself with conditionals

In [31]:
newdf = df[df["W"] > 0]
newdf

Unnamed: 0,W,X,Y,Z
B,0.68681,0.265185,-0.234714,-0.998813
C,1.505252,-0.64682,-0.784118,-0.067121
E,0.569303,0.017401,0.170616,-0.558829


### from list

In [36]:
datam = [1,2,39,67,90]
df = pd.DataFrame(datam, columns = ["nums"])
df

Unnamed: 0,nums
0,1
1,2
2,39
3,67
4,90


### from array

In [40]:
m =  np.arange(1, 10).reshape(3,3)
m

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [43]:
df = pd.DataFrame(m, columns = ["var1", "var2", "var3"])
df

Unnamed: 0,var1,var2,var3
0,1,2,3
1,4,5,6
2,7,8,9


In [46]:
df.columns

Index(['var1', 'var2', 'var3'], dtype='object')

In [47]:
for i in df.columns:
    print(i)

var1
var2
var3


In [48]:
df.columns = ["new1", "new2", "new3"]
df

Unnamed: 0,new1,new2,new3
0,1,2,3
1,4,5,6
2,7,8,9


In [44]:
df.head(2)

Unnamed: 0,var1,var2,var3
0,1,2,3
1,4,5,6


In [45]:
df.tail(2)

Unnamed: 0,var1,var2,var3
1,4,5,6
2,7,8,9


In [49]:
type(df)

pandas.core.frame.DataFrame

In [50]:
df.dtypes

new1    int32
new2    int32
new3    int32
dtype: object

In [51]:
df.shape

(3, 3)

In [52]:
df.ndim

2

In [53]:
df.size

9

In [54]:
df.values

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [55]:
type(df.values)

numpy.ndarray

### from dictionary

In [56]:
np.random.randint(10,size=5)

array([6, 0, 1, 2, 8])

In [57]:
s1 = np.random.randint(10,size=5)
s2 = np.random.randint(10,size=5)
s3 = np.random.randint(10,size=5)
s1,s2,s3

(array([6, 4, 6, 9, 4]), array([0, 2, 7, 4, 1]), array([2, 3, 4, 0, 9]))

In [58]:
mydict = {"var1":s1, "var2":s2, "var3": s3}

In [60]:
df1 = pd.DataFrame(mydict)
df1

Unnamed: 0,var1,var2,var3
0,6,0,2
1,4,2,3
2,6,7,4
3,9,4,0
4,4,1,9


## Conditional selection

In [3]:
df=pd.DataFrame({"col1": [1,2,3,4],
                "col2": [444,555,666,444],
                "col3": ["abc", "def", "ghi", "xyz"]})
df

Unnamed: 0,col1,col2,col3
0,1,444,abc
1,2,555,def
2,3,666,ghi
3,4,444,xyz


In [4]:
df["col1"]

0    1
1    2
2    3
3    4
Name: col1, dtype: int64

In [5]:
df["col1"]>2#returns boolean values
# df.col1 > 2 # dot notation

0    False
1    False
2     True
3     True
Name: col1, dtype: bool

In [7]:
df[df.col1 >2]

Unnamed: 0,col1,col2,col3
2,3,666,ghi
3,4,444,xyz


In [8]:
df[df.col1 >2]["col1"]

2    3
3    4
Name: col1, dtype: int64

In [9]:
# combining conditions

In [13]:
df[(df["col1"]>2) & (df["col2"]==666)]

Unnamed: 0,col1,col2,col3
2,3,666,ghi


In [1]:
# example

In [18]:
df = pd.DataFrame({
    "cigarette": (0,5,10,15,np.nan,20),
    "lung_cap": (45,42,np.nan,33,31,29)
})
df
df[df["cigarette"]==5]["lung_cap"]

1    42.0
Name: lung_cap, dtype: float64

**Conditionals [IF / ELSE] in Pandas - create columns based on conditions**

In [2]:
names = "Martin", "Lisa", "Maxwell", "Manju", "Kenneth"
debts = 10,20,30,20,30
df = pd.DataFrame({"names":names, "debts": debts})
df

Unnamed: 0,names,debts
0,Martin,10
1,Lisa,20
2,Maxwell,30
3,Manju,20
4,Kenneth,30


In [3]:
df["interests"] = [i*0.2 if i<20 else i*0.4 for i in df.debts]

In [4]:
df

Unnamed: 0,names,debts,interests
0,Martin,10,2.0
1,Lisa,20,8.0
2,Maxwell,30,12.0
3,Manju,20,8.0
4,Kenneth,30,12.0


In [9]:
titanic= pd.read_csv("C:/Users/Owner/titanic.csv")
titanic.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [11]:
type(titanic["Age"])

pandas.core.series.Series

In [12]:
titanic["Age"].shape

(891,)

In [13]:
titanic[["Age","Sex"]].head()

Unnamed: 0,Age,Sex
0,22.0,male
1,38.0,female
2,26.0,female
3,35.0,female
4,35.0,male


In [14]:
above35 = titanic["Age"]>35
above35.head()

0    False
1     True
2    False
3    False
4    False
Name: Age, dtype: bool

In [17]:
titanic[titanic["Age"]>35].head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
6,7,0,1,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,E46,S
11,12,1,1,"Bonnell, Miss. Elizabeth",female,58.0,0,0,113783,26.55,C103,S
13,14,0,3,"Andersson, Mr. Anders Johan",male,39.0,1,5,347082,31.275,,S
15,16,1,2,"Hewlett, Mrs. (Mary D Kingcome)",female,55.0,0,0,248706,16.0,,S


In [16]:
titanic.shape

(891, 12)