In [3]:
import numpy as np
import pandas as pd

In [4]:
df = pd.DataFrame({
    "Column1":[1,2,3,4,5,6],
    "Column2":[100,100,200,300,300,100],
    "Column3":["Mustafa","Kemal","Tom","Eva","Janette","Freddy"]
})

In [5]:
df

Unnamed: 0,Column1,Column2,Column3
0,1,100,Mustafa
1,2,100,Kemal
2,3,200,Tom
3,4,300,Eva
4,5,300,Janette
5,6,100,Freddy


***
## Operations:

### .head():
• The chosen amount of indexes of a dataframe will be printed.

In [6]:
df.head() # If you press Shift+Tab you will see "n = 5" as default parameter. This means the output will be the first 5 indexes of the dataframe

Unnamed: 0,Column1,Column2,Column3
0,1,100,Mustafa
1,2,100,Kemal
2,3,200,Tom
3,4,300,Eva
4,5,300,Janette


In [7]:
df.head(n=3) # now it shows the first 3 indexes

Unnamed: 0,Column1,Column2,Column3
0,1,100,Mustafa
1,2,100,Kemal
2,3,200,Tom


***
### .unique() and nunique():
• **unique() :** Shows how many unique values are in a column or index.\
• **nunique() :** Shows the number of unique values.

In [8]:
df["Column2"].unique()

array([100, 200, 300], dtype=int64)

In [9]:
df.loc[0].unique()

array([1, 100, 'Mustafa'], dtype=object)

In [10]:
df.nunique()  # Checked all columns

Column1    6
Column2    3
Column3    6
dtype: int64

In [11]:
df["Column2"].nunique() # Checked only "Column2"

3

***
### .value_counts():
• Returns how many times a value is used.

In [12]:
df["Column2"].value_counts()

100    3
300    2
200    1
Name: Column2, dtype: int64

***
### Filtering: 
• Let's see all **Column1 values which greater than and equal to "2"** and **Column2 values which is "100"**

In [13]:
df

Unnamed: 0,Column1,Column2,Column3
0,1,100,Mustafa
1,2,100,Kemal
2,3,200,Tom
3,4,300,Eva
4,5,300,Janette
5,6,100,Freddy


In [14]:
df[(df["Column1"] >= 2) & (df["Column2"] == 100)]

Unnamed: 0,Column1,Column2,Column3
1,2,100,Kemal
5,6,100,Freddy


***
### .apply():
• We can use functions on values of dataframes with **"apply"**.\
• Let's create a function first.

In [15]:
def times3(x):
    return x * 3

In [16]:
df["Column2"] # All values of "Column2". Let's multiply them with the function

0    100
1    100
2    200
3    300
4    300
5    100
Name: Column2, dtype: int64

In [17]:
df["Column2"].apply(times3) # All multiplied by 3

0    300
1    300
2    600
3    900
4    900
5    300
Name: Column2, dtype: int64

In [18]:
df["Column2"] = df["Column2"].apply(times3) #If you want to apply the changes on dataframe

In [19]:
df # All "Column2" values multiplied by 3 and saved 

Unnamed: 0,Column1,Column2,Column3
0,1,300,Mustafa
1,2,300,Kemal
2,3,600,Tom
3,4,900,Eva
4,5,900,Janette
5,6,300,Freddy


**Note:** You can use lambda as well.\
_Example:_ 

In [20]:
df["Column2"].apply(lambda x : x * 3) # Don't be confused by the results, we used "times3" function before. That's why they are multiplied by 3 again

0     900
1     900
2    1800
3    2700
4    2700
5     900
Name: Column2, dtype: int64

#### Let's use Python's own functions:

In [21]:
df["Column3"] # All "Column3" values which are strings

0    Mustafa
1      Kemal
2        Tom
3        Eva
4    Janette
5     Freddy
Name: Column3, dtype: object

In [22]:
df["Column3"].apply(len) # Returned the length of the strings (values)

0    7
1    5
2    3
3    3
4    7
5    6
Name: Column3, dtype: int64

### .columns and .index:
• **.columns :** Shows the columns' names.\
• **.index :** Shows where indexes start and end.

In [25]:
df

Unnamed: 0,Column1,Column2,Column3
0,1,300,Mustafa
1,2,300,Kemal
2,3,600,Tom
3,4,900,Eva
4,5,900,Janette
5,6,300,Freddy


In [26]:
df.columns

Index(['Column1', 'Column2', 'Column3'], dtype='object')

In [27]:
df.index

RangeIndex(start=0, stop=6, step=1)

In [29]:
len(df.index) # To print the number of indexes

6

## .sort_values():
• Sorts the values from smallest to largest.\
• If you press Shift+Tab you will see **"ascending = True"** parameter as default, that means program will sort values from smallest to largest. If you change it to **"False"** program will sort values from largest to smallest.

In [31]:
df

Unnamed: 0,Column1,Column2,Column3
0,1,300,Mustafa
1,2,300,Kemal
2,3,600,Tom
3,4,900,Eva
4,5,900,Janette
5,6,300,Freddy


In [32]:
df.sort_values("Column2") # Smallest to largest

Unnamed: 0,Column1,Column2,Column3
0,1,300,Mustafa
1,2,300,Kemal
5,6,300,Freddy
2,3,600,Tom
3,4,900,Eva
4,5,900,Janette


In [33]:
df.sort_values("Column2", ascending = False) #Largest to smallest

Unnamed: 0,Column1,Column2,Column3
3,4,900,Eva
4,5,900,Janette
2,3,600,Tom
0,1,300,Mustafa
1,2,300,Kemal
5,6,300,Freddy


***
## Creating Pivot Table:

In [45]:
df = pd.DataFrame({
    "Month": ["March", "April", "May", "March", "April", "May", "March", "April", "May"],
    "City": ["Ankara", "Ankara", "Ankara", "İstanbul", "İstanbul", "İstanbul", "İzmir", "İzmir", "İzmir"],
    "Humidity": [10, 25, 50, 21, 67, 80, 30, 70, 75]
})

In [46]:
df

Unnamed: 0,Month,City,Humidity
0,March,Ankara,10
1,April,Ankara,25
2,May,Ankara,50
3,March,İstanbul,21
4,April,İstanbul,67
5,May,İstanbul,80
6,March,İzmir,30
7,April,İzmir,70
8,May,İzmir,75


Let's create a pivot table and write **month names instead index numbers** and **city names instead columns:**

In [50]:
df.pivot_table(index = "Month", columns = "City", values = "Humidity") 
# If you check Shift+Tab window, you will see "values, index and columns" parameter. We should give these data

City,Ankara,İstanbul,İzmir
Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
April,25,67,70
March,10,21,30
May,50,80,75


As you can see, we got a better dataframe as pivot table, it's similar to excel pivot tables.\
##### Let's do it reversely:

In [53]:
df.pivot_table(index = "City", columns = "Month", values = "Humidity") #Now cities are indexes and months are columns

Month,April,March,May
City,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Ankara,25,10,50
İstanbul,67,21,80
İzmir,70,30,75
