# Creating Data and DataFrame

In [2]:
from xmlrpc.client import boolean
import numpy as np
import pandas as pd

In [3]:
data = np.random.randn(4,3)
print(data)
dataFrame = pd.DataFrame(data)
print(dataFrame)
print(dataFrame[0])

[[ 0.44151469  0.34232569  1.30848263]
 [ 0.26012858 -0.86875526  0.92775554]
 [-1.29353105 -0.70682434 -1.32980746]
 [-0.17670637 -1.03502425 -0.54869409]]
          0         1         2
0  0.441515  0.342326  1.308483
1  0.260129 -0.868755  0.927756
2 -1.293531 -0.706824 -1.329807
3 -0.176706 -1.035024 -0.548694
0    0.441515
1    0.260129
2   -1.293531
3   -0.176706
Name: 0, dtype: float64


# Creating a New DataFrame with Index and Columns

In [4]:
newDataFrame = pd.DataFrame(data, index=["Atil", "Zeynep", "Atlas", "Mehmet"], columns=["Salary", "Age", "Working Hours"])
print(newDataFrame)
print(newDataFrame["Age"]) # Select a specific column
print(newDataFrame.loc["Atil", "Age"]) # Select a specific cell
print(newDataFrame.loc["Atlas"]) # Select a specific row
print(newDataFrame.iloc[3]) # Select a specific row by position index
print(newDataFrame.iloc[:, 2]) # Select a specific column by position index

          Salary       Age  Working Hours
Atil    0.441515  0.342326       1.308483
Zeynep  0.260129 -0.868755       0.927756
Atlas  -1.293531 -0.706824      -1.329807
Mehmet -0.176706 -1.035024      -0.548694
Atil      0.342326
Zeynep   -0.868755
Atlas    -0.706824
Mehmet   -1.035024
Name: Age, dtype: float64
0.34232569418539155
Salary          -1.293531
Age             -0.706824
Working Hours   -1.329807
Name: Atlas, dtype: float64
Salary          -0.176706
Age             -1.035024
Working Hours   -0.548694
Name: Mehmet, dtype: float64
Atil      1.308483
Zeynep    0.927756
Atlas    -1.329807
Mehmet   -0.548694
Name: Working Hours, dtype: float64


# Adding New Data (Index)

In [5]:
newDataFrame["Retirement Age"] = newDataFrame["Age"] + newDataFrame["Age"]
print(newDataFrame)

          Salary       Age  Working Hours  Retirement Age
Atil    0.441515  0.342326       1.308483        0.684651
Zeynep  0.260129 -0.868755       0.927756       -1.737511
Atlas  -1.293531 -0.706824      -1.329807       -1.413649
Mehmet -0.176706 -1.035024      -0.548694       -2.070048


# Deleting Data (Index)

In [6]:
newDataFrame.drop("Mehmet", axis=0, inplace=True) # Here; axis=0 is row, axis=1 is column
newDataFrame.drop("Retirement Age", axis=1, inplace=True)
print(newDataFrame)

          Salary       Age  Working Hours
Atil    0.441515  0.342326       1.308483
Zeynep  0.260129 -0.868755       0.927756
Atlas  -1.293531 -0.706824      -1.329807


# Filtering Data (Index)

In [8]:
print(newDataFrame[newDataFrame < 0])
print(newDataFrame[newDataFrame["Working Hours"] > 0])

          Salary       Age  Working Hours
Atil         NaN       NaN            NaN
Zeynep       NaN -0.868755            NaN
Atlas  -1.293531 -0.706824      -1.329807
          Salary       Age  Working Hours
Atil    0.441515  0.342326       1.308483
Zeynep  0.260129 -0.868755       0.927756


# Labeling Data (Index)

In [9]:
print(newDataFrame.reset_index())
newIndexList = ["Ati", "Zey", "Atl"]
newDataFrame["New Index"] = newIndexList
print(newDataFrame) # Here we just defined the new index.
newDataFrame.set_index("New Index", inplace=True) # With inplace=True we transferred the new index to names.
print(newDataFrame) 
print(newDataFrame.loc["Ati"])

    index    Salary       Age  Working Hours
0    Atil  0.441515  0.342326       1.308483
1  Zeynep  0.260129 -0.868755       0.927756
2   Atlas -1.293531 -0.706824      -1.329807
          Salary       Age  Working Hours New Index
Atil    0.441515  0.342326       1.308483       Ati
Zeynep  0.260129 -0.868755       0.927756       Zey
Atlas  -1.293531 -0.706824      -1.329807       Atl
             Salary       Age  Working Hours
New Index                                   
Ati        0.441515  0.342326       1.308483
Zey        0.260129 -0.868755       0.927756
Atl       -1.293531 -0.706824      -1.329807
Salary           0.441515
Age              0.342326
Working Hours    1.308483
Name: Ati, dtype: float64


# Categorizing Data (Index)

In [10]:
outerIndexes = ["Simpson", "Simpson", "Simpson", "South Park", "South Park", "South Park"]
innerIndexes = ["Homer", "Bart", "Marge", "Cartman", "Kenny", "Kyle"]
mergedIndexes = list(zip(outerIndexes, innerIndexes))
print(mergedIndexes)
mergedIndexes = pd.MultiIndex.from_tuples(sorted(mergedIndexes))
print(mergedIndexes)

[('Simpson', 'Homer'), ('Simpson', 'Bart'), ('Simpson', 'Marge'), ('South Park', 'Cartman'), ('South Park', 'Kenny'), ('South Park', 'Kyle')]
MultiIndex([(   'Simpson',    'Bart'),
            (   'Simpson',   'Homer'),
            (   'Simpson',   'Marge'),
            ('South Park', 'Cartman'),
            ('South Park',   'Kenny'),
            ('South Park',    'Kyle')],
           )


In [11]:
myCartoonList = [[40, "A"], [10, "B"], [30, "C"], [9, "D"], [10, "E"], [11, "F"]] # Table data
cartoonNumpyArray = np.array(myCartoonList) # Create array
cartoonDataFrame = pd.DataFrame(cartoonNumpyArray, index=mergedIndexes, columns=["Age", "Profession"]) # Create categorical table
print(cartoonDataFrame)
cartoonDataFrame.index.names = ["Cartoon Name", "Name"]
print(cartoonDataFrame)

                   Age Profession
Simpson    Bart     40          A
           Homer    10          B
           Marge    30          C
South Park Cartman   9          D
           Kenny    10          E
           Kyle     11          F
                     Age Profession
Cartoon Name Name                  
Simpson      Bart     40          A
             Homer    10          B
             Marge    30          C
South Park   Cartman   9          D
             Kenny    10          E
             Kyle     11          F
