## Creating a Data Frame

In [31]:
import numpy as np
import pandas as pd

students = {
    "names": ["Alice", "Bob", "Charlie", "Diana", "Ethan", "Fiona", "George", "Hannah", "Ian", "Julia"],
    "ages": [23, 25, 22, 24,  21, 23, 26, 22, 24, 25],
    "grades": [88, 92, 85, 90, 78, 84, 95, 87, 80, 89],
    "passed": [True, True, True, True, False, True, True, True, False, True]
}

df = pd.DataFrame(students)

df

Unnamed: 0,names,ages,grades,passed
0,Alice,23,88,True
1,Bob,25,92,True
2,Charlie,22,85,True
3,Diana,24,90,True
4,Ethan,21,78,False
5,Fiona,23,84,True
6,George,26,95,True
7,Hannah,22,87,True
8,Ian,24,80,False
9,Julia,25,89,True


## Reading and Writing Files

In [32]:
df.to_csv('Result.csv')

In [33]:
df.to_json('Result.json')

In [34]:
csv_df = pd.read_csv('Result.csv')
json_df = pd.read_json('Result.json')

## Exploring Data

In [35]:
Result_2 = {
    "names": ["Alice", "Bob", "Charlie", "Diana", "Ethan", "Fiona", "George", "Hannah", "Ian", "Julia"],
    "ages": [23, 25, 22, 24, 21, 23, 26, 22, 24, 25],
    "grades": [88, 92, 45, 90, 78, 49, 95, 87, 80, 33],
}

Result_df = pd.DataFrame(Result_2)
Result_df

Result_df.head(2)

Unnamed: 0,names,ages,grades
0,Alice,23,88
1,Bob,25,92


In [36]:
Result_df.describe()

Unnamed: 0,ages,grades
count,10.0,10.0
mean,23.5,73.7
std,1.581139,22.568661
min,21.0,33.0
25%,22.25,56.25
50%,23.5,83.5
75%,24.75,89.5
max,26.0,95.0


In [37]:
print('Data Frame Type:', type(Result_df))
print('Column Types:\n', Result_df.dtypes )

Data Frame Type: <class 'pandas.core.frame.DataFrame'>
Column Types:
 names     object
ages       int64
grades     int64
dtype: object


## Series Basics

In [38]:
ser = pd.Series(np.random.rand(10))
ser
print("Data Type:", )

Data Type:


## Creating a Larger Data Frame

In [39]:
import numpy as np
import pandas as pd

newdf = pd.DataFrame(np.random.rand(334,5), index=np.arange(334))
newdf

Unnamed: 0,0,1,2,3,4
0,0.155177,0.248256,0.960143,0.650582,0.912226
1,0.443671,0.417518,0.486339,0.942320,0.617359
2,0.983455,0.429240,0.677273,0.731182,0.148549
3,0.767068,0.250812,0.575407,0.589522,0.594022
4,0.176720,0.538974,0.406760,0.127398,0.679189
...,...,...,...,...,...
329,0.244239,0.883040,0.768393,0.503273,0.957822
330,0.497567,0.717879,0.497170,0.993687,0.328282
331,0.940271,0.598429,0.790321,0.392406,0.035391
332,0.541687,0.194869,0.975224,0.651653,0.835314


In [40]:
type(newdf)

pandas.core.frame.DataFrame

In [41]:
newdf.describe()

Unnamed: 0,0,1,2,3,4
count,334.0,334.0,334.0,334.0,334.0
mean,0.488412,0.488397,0.513535,0.5104,0.504852
std,0.292231,0.287018,0.288514,0.281979,0.295526
min,0.000158,0.003585,0.003061,0.001796,0.001363
25%,0.225654,0.248895,0.259153,0.258031,0.238582
50%,0.481501,0.475441,0.51904,0.530672,0.513995
75%,0.747345,0.743041,0.754022,0.718066,0.753982
max,0.997819,0.993279,0.998494,0.999496,0.993407


In [42]:
newdf.dtypes

0    float64
1    float64
2    float64
3    float64
4    float64
dtype: object

In [43]:
newdf.T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,324,325,326,327,328,329,330,331,332,333
0,0.155177,0.443671,0.983455,0.767068,0.17672,0.293107,0.258941,0.352394,0.677251,0.506771,...,0.525608,0.435989,0.454482,0.57578,0.138442,0.244239,0.497567,0.940271,0.541687,0.804805
1,0.248256,0.417518,0.42924,0.250812,0.538974,0.041039,0.815047,0.497775,0.473863,0.235308,...,0.649568,0.970636,0.28463,0.058394,0.290633,0.88304,0.717879,0.598429,0.194869,0.687608
2,0.960143,0.486339,0.677273,0.575407,0.40676,0.058339,0.779505,0.645527,0.916793,0.639648,...,0.494328,0.028318,0.428139,0.998494,0.068926,0.768393,0.49717,0.790321,0.975224,0.071272
3,0.650582,0.94232,0.731182,0.589522,0.127398,0.360914,0.61206,0.469432,0.620949,0.215233,...,0.633343,0.487319,0.44053,0.525901,0.719628,0.503273,0.993687,0.392406,0.651653,0.461509
4,0.912226,0.617359,0.148549,0.594022,0.679189,0.898711,0.984019,0.333915,0.498431,0.052311,...,0.7004,0.707181,0.635236,0.026836,0.308303,0.957822,0.328282,0.035391,0.835314,0.719666


In [44]:
# access and change value of a data frame
newdf.at[3,4] = 98
newdf

Unnamed: 0,0,1,2,3,4
0,0.155177,0.248256,0.960143,0.650582,0.912226
1,0.443671,0.417518,0.486339,0.942320,0.617359
2,0.983455,0.429240,0.677273,0.731182,0.148549
3,0.767068,0.250812,0.575407,0.589522,98.000000
4,0.176720,0.538974,0.406760,0.127398,0.679189
...,...,...,...,...,...
329,0.244239,0.883040,0.768393,0.503273,0.957822
330,0.497567,0.717879,0.497170,0.993687,0.328282
331,0.940271,0.598429,0.790321,0.392406,0.035391
332,0.541687,0.194869,0.975224,0.651653,0.835314


In [45]:
newdf.head(4)

Unnamed: 0,0,1,2,3,4
0,0.155177,0.248256,0.960143,0.650582,0.912226
1,0.443671,0.417518,0.486339,0.94232,0.617359
2,0.983455,0.42924,0.677273,0.731182,0.148549
3,0.767068,0.250812,0.575407,0.589522,98.0


## Conversion to NumPy Array

In [46]:
newdf.to_numpy()

array([[0.1551767 , 0.24825624, 0.9601425 , 0.65058231, 0.91222585],
       [0.44367082, 0.41751827, 0.48633923, 0.94231983, 0.61735949],
       [0.98345529, 0.4292401 , 0.67727326, 0.73118238, 0.14854885],
       ...,
       [0.94027078, 0.59842914, 0.79032141, 0.39240577, 0.03539094],
       [0.54168722, 0.1948686 , 0.97522363, 0.65165265, 0.83531438],
       [0.80480517, 0.68760843, 0.07127224, 0.46150949, 0.71966598]],
      shape=(334, 5))

In [47]:
newdf[2]

0      0.960143
1      0.486339
2      0.677273
3      0.575407
4      0.406760
         ...   
329    0.768393
330    0.497170
331    0.790321
332    0.975224
333    0.071272
Name: 2, Length: 334, dtype: float64

In [48]:
data = {
    'name': ['Alice', 'Bob', 'Charlie'],
    'age': [25, 30, 35]
}

df2 = pd.DataFrame(data, index = ['a','b','c'])
df2

Unnamed: 0,name,age
a,Alice,25
b,Bob,30
c,Charlie,35


In [49]:
print(df2.iloc[0])
print(df2.loc['c','name'])

name    Alice
age        25
Name: a, dtype: object
Charlie
