In [22]:
import pandas as pd
import numpy as np

## creating one dimensional pandas series

In [4]:
series = pd.Series([1,4,np.NaN,7])
series

0    1.0
1    4.0
2    NaN
3    7.0
dtype: float64

## Creating Pandas dataframe using dict

In [7]:
data =  [{'a': 1, 'b': 2, 'c': 3, 'd': 4},
...     {'a': 100, 'b': 200, 'c': 300, 'd': 400},
...     {'a': 1000, 'b': 2000, 'c': 3000, 'd': 4000 }]

s = pd.Series(data)
df = pd.DataFrame(data)
print("Series") 
print(s)
print()
print("Dataframe")
print(df)

Series
0                {'a': 1, 'b': 2, 'c': 3, 'd': 4}
1        {'a': 100, 'b': 200, 'c': 300, 'd': 400}
2    {'a': 1000, 'b': 2000, 'c': 3000, 'd': 4000}
dtype: object

Dataframe
      a     b     c     d
0     1     2     3     4
1   100   200   300   400
2  1000  2000  3000  4000


## Series Operations

In [13]:
print(series,"\n")
print("mean: ",series.mean(),"\n")
print("mode: ",series.mode(),"\n")
print("value_count: ",series.value_counts(),"\n")
print("count: ",series.count(),"\n")
print("std: ",series.std(),"\n")
print("min: ",series.min(),"\n")
print("max: ",series.max(),"\n")
print("iloc: ",series.iloc[0],"\n") # used to access specific element or element in a range
print(series.describe())



0    1.0
1    4.0
2    NaN
3    7.0
dtype: float64 

mean:  4.0 

mode:  0    1.0
1    4.0
2    7.0
dtype: float64 

value_count:  1.0    1
4.0    1
7.0    1
Name: count, dtype: int64 

count:  3 

std:  3.0 

min:  1.0 

max:  7.0 

iloc:  1.0 

count    3.0
mean     4.0
std      3.0
min      1.0
25%      2.5
50%      4.0
75%      5.5
max      7.0
dtype: float64


In [15]:
# create a pandas data frame with only zeroes
shape = (3,3)
zeroes = np.zeros(shape, int)
print(zeroes)

[[0 0 0]
 [0 0 0]
 [0 0 0]]


In [9]:
# create a pandas data frame with only ones
ones = np.ones(shape, float)
print(ones)

[[1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]]


In [10]:
# create a pandas data frame with only random values
nparray = np.random.randint(0,5,(6,6))
print(nparray)

[[4 0 0 2 3 1]
 [3 0 3 0 0 4]
 [2 2 4 3 1 2]
 [0 1 3 2 0 1]
 [2 0 0 3 2 1]
 [0 2 3 1 3 1]]


## Reading from a CSV

In [16]:
dataset = pd.read_csv("../Datasets/AAPL.csv")
print(dataset, '\n')
print(dataset[['Date', 'High']])
print(dataset['Close'])
print(dataset[(dataset['Low']>=130) &(dataset['High'] == 133)])

         Date  Open  High  Low  Close  Adj Close    Volume
0  2023-01-09   130   133  129    130        129  70790800
1  2023-01-10   130   131  128    130        130  63896200
2  2023-01-11   131   133  130    133        132  69458900
3  2023-01-12   133   134  131    133        132  71379600 

         Date  High
0  2023-01-09   133
1  2023-01-10   131
2  2023-01-11   133
3  2023-01-12   134
0    130
1    130
2    133
3    133
Name: Close, dtype: int64
         Date  Open  High  Low  Close  Adj Close    Volume
2  2023-01-11   131   133  130    133        132  69458900


In [17]:
student_data = [[1,15],[2,11],[3,11],[4,20]]

In [18]:
df1 = pd.DataFrame(data=student_data, columns=['student_id','age'], index=['oje',4,5,6], dtype=int)
df1

Unnamed: 0,student_id,age
oje,1,15
4,2,11
5,3,11
6,4,20


In [19]:
df1.head(3)

Unnamed: 0,student_id,age
oje,1,15
4,2,11
5,3,11


In [31]:
my_dict = {'student_id':[101,52,128,3], 'name':['Ulysses','William',None,'Henry'], 'age':[13,10,6,13]}

df = pd.DataFrame(my_dict)

result = df[['name','age']][df['student_id']==101]
result
df

Unnamed: 0,student_id,name,age
0,101,Ulysses,13
1,52,William,10
2,128,,6
3,3,Henry,13


In [21]:
hi = df['age']
print(hi)
hello = [x*2 for x in hi]
print(hello)
df['bonus'] = hello
df



0    13
1    10
2     6
3    13
Name: age, dtype: int64
[26, 20, 12, 26]


Unnamed: 0,student_id,name,age,bonus
0,101,Ulysses,13,26
1,52,William,10,20
2,128,,6,12
3,3,Henry,13,26


In [17]:
df2 = pd.DataFrame(np.arange(12).reshape(3, 4),columns=['A', 'B', 'C', 'D'])
df2

Unnamed: 0,A,B,C,D
0,0,1,2,3
1,4,5,6,7
2,8,9,10,11


In [18]:

df.dropna(subset=['name'])
df

Unnamed: 0,student_id,name,age,bonus
0,101,Ulysses,13,26
1,52,William,10,20
2,128,,6,12
3,3,Henry,13,26


In [20]:
df['age'] = np.NAN
df
df['age'] = df['age'].fillna(0)
df


Unnamed: 0,student_id,name,age,bonus
0,101,Ulysses,0.0,26
1,52,William,0.0,20
2,128,,0.0,12
3,3,Henry,0.0,26


In [39]:
data = {'city':["Jacksonville","Jacksonville","Jacksonville","ElPaso","ElPaso"],
        'month':["January","February","March","January","February"],
        'temperature':[13,23,38,20,6]}

dataframe = pd.DataFrame(data)
dataframe

Unnamed: 0,city,month,temperature
0,Jacksonville,January,13
1,Jacksonville,February,23
2,Jacksonville,March,38
3,ElPaso,January,20
4,ElPaso,February,6


In [47]:
pivoted_data = dataframe.pivot_table(values='temperature',index='month', columns='city')
pivoted_data

city,ElPaso,Jacksonville
month,Unnamed: 1_level_1,Unnamed: 2_level_1
February,6.0,23.0
January,20.0,13.0
March,,38.0
