In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

## Series
A Series is a one-dimensional array-like object containing a sequence of values (of
similar types to NumPy types) and an associated array of data labels, called its index.

In [6]:
series_obj = pd.Series([4, 7, -5, 3])
series_obj

0    4
1    7
2   -5
3    3
dtype: int64

In [7]:
series_obj.values

array([ 4,  7, -5,  3])

In [15]:
series_obj.index = ["aval", "dovom", "sevom", "charom"]

In [17]:
series_obj["dovom"]

7

In [19]:
series_obj[series_obj > 3]

aval     4
dovom    7
dtype: int64

In [20]:
np.exp(series_obj)

aval        54.598150
dovom     1096.633158
sevom        0.006738
charom      20.085537
dtype: float64

In [22]:
"aval" in series_obj

True

In [29]:
sdata = {'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000, 'Tehran': None}
indx = ["C", 'Ohio', 'Texas']
states = pd.Series(sdata, index=indx)
states

C            NaN
Ohio     35000.0
Texas    71000.0
dtype: float64

In [30]:
new_states = pd.Series(sdata)
new_states

Ohio      35000.0
Texas     71000.0
Oregon    16000.0
Utah       5000.0
Tehran        NaN
dtype: float64

In [31]:
states + new_states

C              NaN
Ohio       70000.0
Oregon         NaN
Tehran         NaN
Texas     142000.0
Utah           NaN
dtype: float64

In [28]:
states[states.isnull()]  = 2000
states

C         2000.0
Ohio     35000.0
Texas    71000.0
dtype: float64

### DataFrame

In [38]:
data = {
    'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada', 'Nevada'],
    'year': [2000, 2001, 2002, 2001, 2002, 2003],
    'pop': [1.5, 1.7, 3.6, 2.4, 2.9, 3.2]
}

In [59]:
df = pd.DataFrame(data, columns=['year', 'state', 'pop'])

In [63]:
df.rename(columns={'year': 'sali'}, inplace=True)

In [64]:
df.head()

Unnamed: 0,sali,state,pop
0,2000,Ohio,1.5
1,2001,Ohio,1.7
2,2002,Ohio,3.6
3,2001,Nevada,2.4
4,2002,Nevada,2.9


In [65]:
df.describe()

Unnamed: 0,sali,pop
count,6.0,6.0
mean,2001.5,2.55
std,1.048809,0.836062
min,2000.0,1.5
25%,2001.0,1.875
50%,2001.5,2.65
75%,2002.0,3.125
max,2003.0,3.6


In [66]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   sali    6 non-null      int64  
 1   state   6 non-null      object 
 2   pop     6 non-null      float64
dtypes: float64(1), int64(1), object(1)
memory usage: 272.0+ bytes


In [68]:
type(df['sali'])

pandas.core.series.Series

In [69]:
df.sali

0    2000
1    2001
2    2002
3    2001
4    2002
5    2003
Name: sali, dtype: int64

In [70]:
iris_df = pd.read_csv('iris.csv')

In [71]:
iris_df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,class
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [72]:
iris_df.shape

(150, 5)

In [73]:
set(iris_df['class'])

{'setosa', 'versicolor', 'virginica'}

In [77]:
iris_df.describe()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
count,150.0,150.0,150.0,150.0
mean,5.843333,3.054,3.758667,1.198667
std,0.828066,0.433594,1.76442,0.763161
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5
