# Creating or reading a DataFrame/Series

In [1]:
import pandas as pd

In [2]:
# Create a DataFrame with explicit arguments
synth_df = pd.DataFrame({'Yes': [1, 5], 'No': [3, 8]}, index=["First", "Second"]) # Index parameter is optional (default: 0, 1, ...)
synth_df 

Unnamed: 0,Yes,No
First,1,3
Second,5,8


In [3]:
# Load a DataFrame with a .csv
df = pd.read_csv('data/titanic.csv')
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [4]:
# Create Series
synth_ser = pd.Series([49, 28, 10])
synth_ser

0    49
1    28
2    10
dtype: int64

### Adding data to an existing DataFrame

In [5]:
# Add a column to the DataFrame
synth_df['Maybe'] = [10, 10]

# Add a row to a Dataframe
synth_df.loc['Third'] = [8, 3, 10]

synth_df

Unnamed: 0,Yes,No,Maybe
First,1,3,10
Second,5,8,10
Third,8,3,10


In [6]:
# Add value to a Series
synth_ser[3] = "Hello, this is the new value"
synth_ser

0                              49
1                              28
2                              10
3    Hello, this is the new value
dtype: object

In [7]:
# Give a label to the index column
synth_df.index.name = "Position"
synth_df

Unnamed: 0_level_0,Yes,No,Maybe
Position,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
First,1,3,10
Second,5,8,10
Third,8,3,10


In [8]:
# Convert a field column into the index column
df.set_index('PassengerId')  # This is not in place unless specified

Unnamed: 0_level_0,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...
887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


Notice how the label "PassengerId" is vertically lowered when compared to the other columns, this is Pandas way to hint it's no longer considered a column, but rather the index's name.