In [1]:
import pandas as pd
import numpy as np

In [2]:
# Pandas Series : One-dimensional ndarray with axis labels, == Column

# passing array
s = pd.Series([1111, 1.24542, 'Example!!!!'])
print(s)

0           1111
1        1.24542
2    Example!!!!
dtype: object


In [3]:
# passing dictionary
s = pd.Series({'A':1, 'B':2, 'C':3})
print(s)

A    1
B    2
C    3
dtype: int64


In [4]:
# Pandas DataFrame : Two-dimensional, size-mutable, potentially heterogeneous tabular data.

# passing numpy array
df = pd.DataFrame(np.random.randn(5,5))
print(df)

          0         1         2         3         4
0 -0.541069  0.112103 -0.925771  0.713033 -2.597098
1  1.128387 -1.932126  2.391212 -0.024128 -0.565479
2  0.082966 -0.544895  0.057184  0.818047  0.191551
3  0.699847  0.084577 -0.074643 -0.216888  2.258223
4 -0.154463  0.490667  0.461094 -1.254975 -0.141539


In [5]:
# passing dictionary of objects
df = pd.DataFrame({
    'A' : 1,
    'B' : np.array([6] * 5, dtype=np.int32),
    'C' : np.array([1,2,3,4,5], dtype=np.int32),
})
print(df)

   A  B  C
0  1  6  1
1  1  6  2
2  1  6  3
3  1  6  4
4  1  6  5


In [6]:
# load csv, json from existing file
# JSON
df = pd.read_json("example.json")
print(df)

      A     B       C
0  1122  3252    2325
1  1122  3252   23335
2  1122  3252    1115
3  1122  3252   67325
4  1122  3252   52325
5  1122  3252  682325
6  1122  3252  122325
7  1122  3252  262325
8  1122  3252  672325


In [7]:
# CSV
df = pd.read_csv("example.csv")
print(df)

      A       B     C     D
0  1124    5345  5464    23
1   456     536  3462  1234
2    35  234523   452   234


In [8]:
# export csv, json from dataframe
df = pd.DataFrame(np.random.randn(5,5))
df.to_csv("example2.csv")
df.to_json("example2.json")

In [10]:
# summary data
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   0       5 non-null      float64
 1   1       5 non-null      float64
 2   2       5 non-null      float64
 3   3       5 non-null      float64
 4   4       5 non-null      float64
dtypes: float64(5)
memory usage: 328.0 bytes


In [11]:
df.describe()

Unnamed: 0,0,1,2,3,4
count,5.0,5.0,5.0,5.0,5.0
mean,-0.354222,0.191799,-0.715222,0.150651,0.129677
std,0.359859,0.76805,0.39863,0.93489,0.257093
min,-0.670242,-0.613166,-1.40814,-0.914774,-0.154001
25%,-0.647958,-0.356138,-0.646457,-0.472536,-0.070156
50%,-0.463842,-0.080452,-0.602133,-0.087595,0.122736
75%,-0.169288,0.912956,-0.520423,0.941699,0.266136
max,0.18022,1.095797,-0.398956,1.286461,0.48367


In [13]:
# see columns
print(df.columns)

RangeIndex(start=0, stop=5, step=1)


In [14]:
# see data types
print(df.dtypes)

0    float64
1    float64
2    float64
3    float64
4    float64
dtype: object


In [16]:
# check duplication
df.duplicated()
# drop duplicated rows
df.drop_duplicates()

Unnamed: 0,0,1,2,3,4
0,-0.169288,-0.080452,-1.40814,-0.914774,-0.070156
1,-0.463842,-0.356138,-0.520423,0.941699,0.266136
2,0.18022,0.912956,-0.602133,-0.472536,0.122736
3,-0.670242,-0.613166,-0.646457,-0.087595,0.48367
4,-0.647958,1.095797,-0.398956,1.286461,-0.154001


In [17]:
# check null values
df.isnull().sum()

0    0
1    0
2    0
3    0
4    0
dtype: int64

In [18]:
# drop rows contained null values
df.dropna()

Unnamed: 0,0,1,2,3,4
0,-0.169288,-0.080452,-1.40814,-0.914774,-0.070156
1,-0.463842,-0.356138,-0.520423,0.941699,0.266136
2,0.18022,0.912956,-0.602133,-0.472536,0.122736
3,-0.670242,-0.613166,-0.646457,-0.087595,0.48367
4,-0.647958,1.095797,-0.398956,1.286461,-0.154001


In [19]:
# mean for Series
df.mean()

0   -0.354222
1    0.191799
2   -0.715222
3    0.150651
4    0.129677
dtype: float64

In [20]:
# cumulative sum using numpy operation
df.apply(np.cumsum)

Unnamed: 0,0,1,2,3,4
0,-0.169288,-0.080452,-1.40814,-0.914774,-0.070156
1,-0.63313,-0.436591,-1.928563,0.026924,0.19598
2,-0.45291,0.476365,-2.530696,-0.445612,0.318715
3,-1.123152,-0.136801,-3.177153,-0.533207,0.802385
4,-1.77111,0.958995,-3.57611,0.753255,0.648384


In [21]:
# get min, max
print(df.max())
print('-----------------------------------')
print(df.min())

0    0.180220
1    1.095797
2   -0.398956
3    1.286461
4    0.483670
dtype: float64
0   -0.670242
1   -0.613166
2   -1.408140
3   -0.914774
4   -0.154001
dtype: float64


In [30]:
strings = np.array(['America', 'Republic Of Korea', 'India', 'america'])
strings = pd.Series(strings)
strings

0              America
1    Republic Of Korea
2                India
3              america
dtype: object

In [37]:
## String
# to lower case
print(strings.str.lower())
print('-----------------------------------')
# to upper case
print(strings.str.upper())
print('-----------------------------------')
# swapcase
print(strings.str.swapcase())
print('-----------------------------------')
# get length
print(strings.str.len())
print('-----------------------------------')
# split string
print(strings.str.split())
print('-----------------------------------')
# unique values
print(strings.unique())
print('-----------------------------------')
# repeart values
repeats = [1, 2, 3, 4]
strings.str.repeat(repeats)
print('-----------------------------------')

0              america
1    republic of korea
2                india
3              america
dtype: object
-----------------------------------
0              AMERICA
1    REPUBLIC OF KOREA
2                INDIA
3              AMERICA
dtype: object
-----------------------------------
0              aMERICA
1    rEPUBLIC oF kOREA
2                iNDIA
3              AMERICA
dtype: object
-----------------------------------
0     7
1    17
2     5
3     7
dtype: int64
-----------------------------------
0                [America]
1    [Republic, Of, Korea]
2                  [India]
3                [america]
dtype: object
-----------------------------------
['America' 'Republic Of Korea' 'India' 'america']
-----------------------------------


0                               America
1    Republic Of KoreaRepublic Of Korea
2                       IndiaIndiaIndia
3          americaamericaamericaamerica
dtype: object