# PYTHON PANDAS PART 2

## Indexing, Selecting, Filtering

In [1]:
import pandas as pd
import numpy as np

In [2]:
object_ = pd.Series(np.arange(5), index = ["a","b","c","d","e"])
object_

a    0
b    1
c    2
d    3
e    4
dtype: int32

In [3]:
object_[0:3]

a    0
b    1
c    2
dtype: int32

In [4]:
object_[["a","c"]]

a    0
c    2
dtype: int32

In [5]:
object_[[0,2]]

a    0
c    2
dtype: int32

In [6]:
object_[object_<2]

a    0
b    1
dtype: int32

In [8]:
object_["a":"c"] # "c" is included

a    0
b    1
c    2
dtype: int32

In [9]:
object_["b":"c"] = 5
object_

a    0
b    5
c    5
d    3
e    4
dtype: int32

In [11]:
data = pd.DataFrame(np.arange(16).reshape(4,4), index = ["İstanbul","Paris","Roma","London"],
                    columns= ["one","two","three","four"])
data

Unnamed: 0,one,two,three,four
İstanbul,0,1,2,3
Paris,4,5,6,7
Roma,8,9,10,11
London,12,13,14,15


In [12]:
data["two"]

İstanbul     1
Paris        5
Roma         9
London      13
Name: two, dtype: int32

In [14]:
data[["one","two"]]

Unnamed: 0,one,two
İstanbul,0,1
Paris,4,5
Roma,8,9
London,12,13


In [15]:
data[:3]

Unnamed: 0,one,two,three,four
İstanbul,0,1,2,3
Paris,4,5,6,7
Roma,8,9,10,11


In [17]:
data[data["four"]>5]

Unnamed: 0,one,two,three,four
Paris,4,5,6,7
Roma,8,9,10,11
London,12,13,14,15


In [18]:
data[data<5]= 0
data

Unnamed: 0,one,two,three,four
İstanbul,0,0,0,0
Paris,0,5,6,7
Roma,8,9,10,11
London,12,13,14,15


In [19]:
data.iloc[1]  # The values of the row with the first index are displayed.

one      0
two      5
three    6
four     7
Name: Paris, dtype: int32

In [20]:
data.iloc[1,[1,2,3]]  # index and column are selected.

two      5
three    6
four     7
Name: Paris, dtype: int32

In [21]:
data.iloc[[1,2],[1,2,3]] 

Unnamed: 0,two,three,four
Paris,5,6,7
Roma,9,10,11


In [22]:
data.loc["Roma",["two","four"]]

two      9
four    11
Name: Roma, dtype: int32

In [23]:
data.loc[:"Roma",["two","four"]]

Unnamed: 0,two,four
İstanbul,0,0
Paris,5,7
Roma,9,11


In [24]:
object_1 = pd.Series(np.arange(5))
object_1

0    0
1    1
2    2
3    3
4    4
dtype: int32

In [25]:
object_1[-1] # When there is no indexing, a negative index cannot be obtained.

KeyError: -1

In [27]:
object_1 = pd.Series(np.arange(5), index = ["a","b","c","d","e"])
object_1[-1] # takes the penultimate one.

4

## Important Methods

In [28]:
s = pd.Series([1,2,3,4], index = ["a","b","c","d"])
s

a    1
b    2
c    3
d    4
dtype: int64

In [29]:
s2 = s.reindex(["b","c","a","d","e"])
s2

b    2.0
c    3.0
a    1.0
d    4.0
e    NaN
dtype: float64

In [30]:
s3 = pd.Series(["blue","yellow","purple"], index = [0,2,4])
s3

0      blue
2    yellow
4    purple
dtype: object

In [33]:
s3.reindex(range(6), method = 'ffill') # Fills in between indexes.

0      blue
1      blue
2    yellow
3    yellow
4    purple
5    purple
dtype: object

In [34]:
data = pd.DataFrame(np.arange(9).reshape(3,3), index = ["a","c","d"],
                    columns= ["Erdem","Joe","Jack"])
data

Unnamed: 0,Erdem,Joe,Jack
a,0,1,2
c,3,4,5
d,6,7,8


In [35]:
data1 = data.reindex(["d","c","b","a"])
data1

Unnamed: 0,Erdem,Joe,Jack
d,6.0,7.0,8.0
c,3.0,4.0,5.0
b,,,
a,0.0,1.0,2.0


In [36]:
name = ["Erdem","Angela","Jack"]
data.reindex(columns= name)

Unnamed: 0,Erdem,Angela,Jack
a,0,,2
c,3,,5
d,6,,8


In [38]:
data.loc[["c","d","a"]]

Unnamed: 0,Erdem,Joe,Jack
c,3,4,5
d,6,7,8
a,0,1,2


In [39]:
s = pd.Series(np.arange(5), index = ["a","b","c","d","e"])
s

a    0
b    1
c    2
d    3
e    4
dtype: int32

In [40]:
s1 = s.drop("b")
s1

a    0
c    2
d    3
e    4
dtype: int32

In [42]:
s.drop(["c","d"])

a    0
b    1
e    4
dtype: int32

In [44]:
data = pd.DataFrame(np.arange(16).reshape(4,4), index = ["Erdem","Joe","Jack","Angela"],
                    columns= list("ABCD"))
data

Unnamed: 0,A,B,C,D
Erdem,0,1,2,3
Joe,4,5,6,7
Jack,8,9,10,11
Angela,12,13,14,15


In [45]:
data.drop(["Joe","Jack"])

Unnamed: 0,A,B,C,D
Erdem,0,1,2,3
Angela,12,13,14,15


In [46]:
data.drop("A", axis=1)

Unnamed: 0,B,C,D
Erdem,1,2,3
Joe,5,6,7
Jack,9,10,11
Angela,13,14,15


In [47]:
data.drop("Angela", axis=0)

Unnamed: 0,A,B,C,D
Erdem,0,1,2,3
Joe,4,5,6,7
Jack,8,9,10,11


In [49]:
data.mean()

A    6.0
B    7.0
C    8.0
D    9.0
dtype: float64

In [51]:
data.mean(axis=0) # rows averaged for each column

A    6.0
B    7.0
C    8.0
D    9.0
dtype: float64

In [52]:
data.mean(axis=1) # average of columns for each row

Erdem      1.5
Joe        5.5
Jack       9.5
Angela    13.5
dtype: float64

 ## Arithmatic Operations

In [53]:
s1 = pd.Series(np.arange(4), index = ["a","c","d","e"])
s2 = pd.Series(np.arange(5), index = ["a","c","e","f","g"])

print(s1)
print(s2)

a    0
c    1
d    2
e    3
dtype: int32
a    0
c    1
e    2
f    3
g    4
dtype: int32


In [55]:
s1+s2 # Values with matching indexes were collected, and values that did not match were taken as null values.

a    0.0
c    2.0
d    NaN
e    5.0
f    NaN
g    NaN
dtype: float64

In [57]:
df1 = pd.DataFrame(np.arange(6).reshape(2,3), index = ["Erdem","Joe"],
                    columns= list("ABC"))
df2 = data = pd.DataFrame(np.arange(9).reshape(3,3), index = ["Erdem","Joe","Jack"],
                    columns= list("ACD"))

print(df1)
print(df2)

       A  B  C
Erdem  0  1  2
Joe    3  4  5
       A  C  D
Erdem  0  1  2
Joe    3  4  5
Jack   6  7  8


In [60]:
df1+df2 

# The summation operation is performed for rows and columns with matching values.
# For those that do not match, a null value is returned.


Unnamed: 0,A,B,C,D
Erdem,0.0,,3.0,
Jack,,,,
Joe,6.0,,9.0,


In [64]:
df1.add(df2, fill_value = 0) # Non-matching values are set to 0.

Unnamed: 0,A,B,C,D
Erdem,0.0,1.0,3.0,2.0
Jack,6.0,,7.0,8.0
Joe,6.0,4.0,9.0,5.0


In [65]:
1/df1 # It takes its inverse according to multiplication.

Unnamed: 0,A,B,C
Erdem,inf,1.0,0.5
Joe,0.333333,0.25,0.2


In [66]:
print(df1*3)
print(df1+3)
print(df1/3)
print(df1-3)

       A   B   C
Erdem  0   3   6
Joe    9  12  15
       A  B  C
Erdem  3  4  5
Joe    6  7  8
         A         B         C
Erdem  0.0  0.333333  0.666667
Joe    1.0  1.333333  1.666667
       A  B  C
Erdem -3 -2 -1
Joe    0  1  2


In [69]:
print(df1.mul(3))
print(df1.sub(3))
print(df1.div(3))
print(df1.pow(3))

       A   B   C
Erdem  0   3   6
Joe    9  12  15
       A  B  C
Erdem -3 -2 -1
Joe    0  1  2
         A         B         C
Erdem  0.0  0.333333  0.666667
Joe    1.0  1.333333  1.666667
        A   B    C
Erdem   0   1    8
Joe    27  64  125


In [71]:
s = df2.iloc[1]
print(s)
print(df2-s)

A    3
C    4
D    5
Name: Joe, dtype: int32
       A  C  D
Erdem -3 -3 -3
Joe    0  0  0
Jack   3  3  3


In [74]:
s1 = df2["A"]
print(s1)
print(df2.sub(s1, axis = "index")) # The column s1 has been subtracted from the columns of df2.

Erdem    0
Joe      3
Jack     6
Name: A, dtype: int32
       A  C  D
Erdem  0  1  2
Joe    0  1  2
Jack   0  1  2


In [None]:
## Function Application

In [89]:
data = pd.DataFrame(np.random.randn(4,3), index = ["Erdem","Joe","Jack","Angela"],
                    columns= list("ABC"))
data

Unnamed: 0,A,B,C
Erdem,0.255995,-0.073021,-0.122401
Joe,0.612635,0.373669,0.013067
Jack,0.803116,-0.783877,-1.02373
Angela,0.675618,-0.905072,0.487291


In [90]:
data.abs()

Unnamed: 0,A,B,C
Erdem,0.255995,0.073021,0.122401
Joe,0.612635,0.373669,0.013067
Jack,0.803116,0.783877,1.02373
Angela,0.675618,0.905072,0.487291


In [91]:
func = lambda x:x.max()-x.min()

In [92]:
data.apply(func)

A    0.547121
B    1.278741
C    1.511021
dtype: float64

In [93]:
func1 = lambda x:(x.max()-x.min())/x.mean()

In [94]:
data.apply(func1)

A    0.932316
B   -3.684337
C   -9.359460
dtype: float64

In [95]:
func2 = lambda x:(x.max()-x.min())/x.count()
data.apply(func1)

A    0.932316
B   -3.684337
C   -9.359460
dtype: float64

In [96]:
data.apply(func1, axis=1)

Erdem     18.740826
Joe        1.799835
Jack      -5.456035
Angela    18.391753
dtype: float64

In [98]:
def f(x):
    return x**2

data.apply(f)

Unnamed: 0,A,B,C
Erdem,0.065533,0.005332,0.014982
Joe,0.375321,0.139629,0.000171
Jack,0.644995,0.614463,1.048022
Angela,0.456459,0.819155,0.237452


## Sort

In [100]:
s = pd.Series(np.arange(5), index = ["c","g","e","a","f"])
print(s)
print(s.sort_index())

c    0
g    1
e    2
a    3
f    4
dtype: int32
a    3
c    0
e    2
f    4
g    1
dtype: int32


In [104]:
df = pd.DataFrame(np.arange(12).reshape(3,4), index = ["two","one","three"],
                    columns= ["d","a","b","c"])
print(df)
print(df.sort_index()) # sort by index
print(df.sort_index(axis=1)) # sort by columns
print(df.sort_index(axis=1, ascending=False)) # desc

       d  a   b   c
two    0  1   2   3
one    4  5   6   7
three  8  9  10  11
       d  a   b   c
one    4  5   6   7
three  8  9  10  11
two    0  1   2   3
       a   b   c  d
two    1   2   3  0
one    5   6   7  4
three  9  10  11  8
       d   c   b  a
two    0   3   2  1
one    4   7   6  5
three  8  11  10  9


In [108]:
s2 = pd.Series([5,np.nan,3,-1,9])
print(s2)
print(s2.sort_values())

0    5.0
1    NaN
2    3.0
3   -1.0
4    9.0
dtype: float64
3   -1.0
2    3.0
0    5.0
4    9.0
1    NaN
dtype: float64


In [112]:
df2 = pd.DataFrame({"a":[3,2,-6,7],
                    "b":[5,-4,8,2]})

print(df2)
print(df2.sort_values(by="b")) # sort by b
print(df2.sort_values(by=["b","a"])) # first sort by b, after a

   a  b
0  3  5
1  2 -4
2 -6  8
3  7  2
   a  b
1  2 -4
3  7  2
0  3  5
2 -6  8
   a  b
1  2 -4
3  7  2
0  3  5
2 -6  8


In [113]:
games = pd.read_csv("vgsales.csv") #data sets = https://www.kaggle.com/datasets/gregorut/videogamesales
games

Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
0,1,Wii Sports,Wii,2006.0,Sports,Nintendo,41.49,29.02,3.77,8.46,82.74
1,2,Super Mario Bros.,NES,1985.0,Platform,Nintendo,29.08,3.58,6.81,0.77,40.24
2,3,Mario Kart Wii,Wii,2008.0,Racing,Nintendo,15.85,12.88,3.79,3.31,35.82
3,4,Wii Sports Resort,Wii,2009.0,Sports,Nintendo,15.75,11.01,3.28,2.96,33.00
4,5,Pokemon Red/Pokemon Blue,GB,1996.0,Role-Playing,Nintendo,11.27,8.89,10.22,1.00,31.37
...,...,...,...,...,...,...,...,...,...,...,...
16593,16596,Woody Woodpecker in Crazy Castle 5,GBA,2002.0,Platform,Kemco,0.01,0.00,0.00,0.00,0.01
16594,16597,Men in Black II: Alien Escape,GC,2003.0,Shooter,Infogrames,0.01,0.00,0.00,0.00,0.01
16595,16598,SCORE International Baja 1000: The Official Game,PS2,2008.0,Racing,Activision,0.00,0.00,0.00,0.00,0.01
16596,16599,Know How 2,DS,2010.0,Puzzle,7G//AMES,0.00,0.01,0.00,0.00,0.01


In [114]:
games.Name.sort_values()

4754                                 '98 Koshien
8357                  .hack//G.U. Vol.1//Rebirth
7107                .hack//G.U. Vol.2//Reminisce
8602     .hack//G.U. Vol.2//Reminisce (jp sales)
8304               .hack//G.U. Vol.3//Redemption
                          ...                   
627                                 uDraw Studio
7835                uDraw Studio: Instant Artist
15523               uDraw Studio: Instant Artist
470                   wwe Smackdown vs. Raw 2006
9135                ¡Shin Chan Flipa en colores!
Name: Name, Length: 16598, dtype: object

In [115]:
games["Name"].sort_values()

4754                                 '98 Koshien
8357                  .hack//G.U. Vol.1//Rebirth
7107                .hack//G.U. Vol.2//Reminisce
8602     .hack//G.U. Vol.2//Reminisce (jp sales)
8304               .hack//G.U. Vol.3//Redemption
                          ...                   
627                                 uDraw Studio
7835                uDraw Studio: Instant Artist
15523               uDraw Studio: Instant Artist
470                   wwe Smackdown vs. Raw 2006
9135                ¡Shin Chan Flipa en colores!
Name: Name, Length: 16598, dtype: object

In [116]:
type(games["Name"].sort_values())

pandas.core.series.Series

In [118]:
games["Name"].sort_values( ascending=False)

9135                ¡Shin Chan Flipa en colores!
470                   wwe Smackdown vs. Raw 2006
15523               uDraw Studio: Instant Artist
7835                uDraw Studio: Instant Artist
627                                 uDraw Studio
                          ...                   
8304               .hack//G.U. Vol.3//Redemption
8602     .hack//G.U. Vol.2//Reminisce (jp sales)
7107                .hack//G.U. Vol.2//Reminisce
8357                  .hack//G.U. Vol.1//Rebirth
4754                                 '98 Koshien
Name: Name, Length: 16598, dtype: object

In [119]:
games.sort_values("Name") # Sorts all data by relevant column.

Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
4754,4756,'98 Koshien,PS,1998.0,Sports,Magical Company,0.15,0.10,0.12,0.03,0.41
8357,8359,.hack//G.U. Vol.1//Rebirth,PS2,2006.0,Role-Playing,Namco Bandai Games,0.00,0.00,0.17,0.00,0.17
7107,7109,.hack//G.U. Vol.2//Reminisce,PS2,2006.0,Role-Playing,Namco Bandai Games,0.11,0.09,0.00,0.03,0.23
8602,8604,.hack//G.U. Vol.2//Reminisce (jp sales),PS2,2006.0,Role-Playing,Namco Bandai Games,0.00,0.00,0.16,0.00,0.16
8304,8306,.hack//G.U. Vol.3//Redemption,PS2,2007.0,Role-Playing,Namco Bandai Games,0.00,0.00,0.17,0.00,0.17
...,...,...,...,...,...,...,...,...,...,...,...
627,628,uDraw Studio,Wii,2010.0,Misc,THQ,1.67,0.58,0.00,0.20,2.46
7835,7837,uDraw Studio: Instant Artist,Wii,2011.0,Misc,THQ,0.08,0.09,0.00,0.02,0.19
15523,15526,uDraw Studio: Instant Artist,X360,2011.0,Misc,THQ,0.01,0.01,0.00,0.00,0.02
470,471,wwe Smackdown vs. Raw 2006,PS2,,Fighting,,1.57,1.02,0.00,0.41,3.00


In [120]:
games.sort_values("Year")

Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
6896,6898,Checkers,2600,1980.0,Misc,Atari,0.22,0.01,0.0,0.00,0.24
2669,2671,Boxing,2600,1980.0,Fighting,Activision,0.72,0.04,0.0,0.01,0.77
5366,5368,Freeway,2600,1980.0,Action,Activision,0.32,0.02,0.0,0.00,0.34
1969,1971,Defender,2600,1980.0,Misc,Atari,0.99,0.05,0.0,0.01,1.05
1766,1768,Kaboom!,2600,1980.0,Misc,Activision,1.07,0.07,0.0,0.01,1.15
...,...,...,...,...,...,...,...,...,...,...,...
16307,16310,Freaky Flyers,GC,,Racing,Unknown,0.01,0.00,0.0,0.00,0.01
16327,16330,Inversion,PC,,Shooter,Namco Bandai Games,0.01,0.00,0.0,0.00,0.01
16366,16369,Hakuouki: Shinsengumi Kitan,PS3,,Adventure,Unknown,0.01,0.00,0.0,0.00,0.01
16427,16430,Virtua Quest,GC,,Role-Playing,Unknown,0.01,0.00,0.0,0.00,0.01


In [121]:
games.sort_values(["Year","Name"])

Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
258,259,Asteroids,2600,1980.0,Shooter,Atari,4.00,0.26,0.00,0.05,4.31
2669,2671,Boxing,2600,1980.0,Fighting,Activision,0.72,0.04,0.00,0.01,0.77
6317,6319,Bridge,2600,1980.0,Misc,Activision,0.25,0.02,0.00,0.00,0.27
6896,6898,Checkers,2600,1980.0,Misc,Atari,0.22,0.01,0.00,0.00,0.24
1969,1971,Defender,2600,1980.0,Misc,Atari,0.99,0.05,0.00,0.01,1.05
...,...,...,...,...,...,...,...,...,...,...,...
7351,7353,Yu Yu Hakusho: Dark Tournament,PS2,,Fighting,,0.10,0.08,0.00,0.03,0.21
15476,15479,Yu-Gi-Oh! 5D's Wheelie Breakers (JP sales),Wii,,Racing,Unknown,0.00,0.00,0.02,0.00,0.02
11409,11411,Zero: Tsukihami no Kamen,Wii,,Action,Nintendo,0.00,0.00,0.08,0.00,0.08
8899,8901,eJay Clubworld,PS2,,Misc,Empire Interactive,0.07,0.06,0.00,0.02,0.15
