<a href="https://colab.research.google.com/github/AzadMehedi/Pandas/blob/main/10_minutes_to_pandas_practice.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np

#Object creation
s = pd.Series([1,3,5,np.nan,6,8])
s


0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [None]:
#Creating a DataFrame by passing a NumPy array, with a datetime index and labeled columns:
dates = pd.date_range('20130101', periods=6)
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [None]:
df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=list('ABCD'))
df

Unnamed: 0,A,B,C,D
2013-01-01,1.472144,1.962587,-0.505662,-0.136562
2013-01-02,1.073427,-1.581209,0.012019,-1.232952
2013-01-03,-0.055263,-0.747325,-0.885778,-0.141106
2013-01-04,0.991484,0.229064,0.798565,-0.846814
2013-01-05,0.297961,1.310868,-0.230209,0.345082
2013-01-06,0.180598,0.901565,-2.296566,0.44079


In [None]:
#Creating a DataFrame by passing a dictionary of objects that can be converted into a series-like structure:
import pandas as pd
import numpy as np

df2 = pd.DataFrame({
    'A':1.0,
    'B':pd.Timestamp('20220101'),
    'C':pd.Series(1, index=list(range(4)), dtype=('float32')),
    'D':np.array([3]*4, dtype='int32'),
    'E':pd.Categorical(['test','train','test','train']),
    'F':'foo',
})
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2022-01-01,1.0,3,test,foo
1,1.0,2022-01-01,1.0,3,train,foo
2,1.0,2022-01-01,1.0,3,test,foo
3,1.0,2022-01-01,1.0,3,train,foo


In [None]:
#The columns of the resulting DataFrame have different dtypes:
#type(df2)
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

# Viewing data

In [None]:
#Here is how to view the top and bottom rows of the frame:

df = pd.read_csv(r'C:\Users\azadm\Downloads\dataset\data.csv')
#df.head()
df.head(10)

Unnamed: 0.1,Unnamed: 0,Goal_no,Season,Competition,Matchday,Venue,Team,Opponent,Result,Position,Minute,At_score,Type_of_goal
0,0,1,2/3,Liga Portugal,6,H,Sporting CP,Moreirense FC,3:0,LW,34,2:0,Solo run
1,1,2,2/3,Liga Portugal,6,H,Sporting CP,Moreirense FC,3:0,,90+5,3:0,Header
2,2,3,2/3,Liga Portugal,8,A,Sporting CP,Boavista FC,1:2,,88,1:2,Right-footed shot
3,3,4,2/3,TaÃ§a de Portugal Placard,Fourth Round,H,Sporting CP,CD Estarreja,4:1,,67,3:0,Left-footed shot
4,4,5,2/3,TaÃ§a de Portugal Placard,Fifth Round,H,Sporting CP,FC Oliveira do Hospital,8:1,,13,3:0,
5,5,6,3/4,Premier League,11,H,Manchester United,Portsmouth FC,3:0,RW,80,2:0,Direct free kick
6,6,7,3/4,FA Cup,Fifth Round,H,Manchester United,Manchester City,4:2,RW,74,3:0,Right-footed shot
7,7,8,3/4,Premier League,29,H,Manchester United,Tottenham Hotspur,3:0,,89,2:0,Right-footed shot
8,8,9,3/4,Premier League,32,A,Manchester United,Birmingham City,1:2,,60,1:1,Header
9,9,10,3/4,Premier League,38,A,Manchester United,Aston Villa,0:2,,4,0:1,Right-footed shot


In [None]:
#df.tail()
df.tail(7)

Unnamed: 0.1,Unnamed: 0,Goal_no,Season,Competition,Matchday,Venue,Team,Opponent,Result,Position,Minute,At_score,Type_of_goal
691,691,692,21/22,Premier League,29,H,Manchester United,Tottenham Hotspur,3:2,CF,81,3:2,Header
692,692,693,21/22,Premier League,33,H,Manchester United,Norwich City,3:2,CF,7,1:0,Right-footed shot
693,693,694,21/22,Premier League,33,H,Manchester United,Norwich City,3:2,CF,32,2:0,Header
694,694,695,21/22,Premier League,33,H,Manchester United,Norwich City,3:2,CF,76,3:2,Direct free kick
695,695,696,21/22,Premier League,34,A,Manchester United,Arsenal FC,3:1,CF,34,2:1,Left-footed shot
696,696,697,21/22,Premier League,37,H,Manchester United,Chelsea FC,1:1,CF,62,1:1,Right-footed shot
697,697,698,21/22,Premier League,35,H,Manchester United,Brentford FC,3:0,CF,61,2:0,Penalty


In [None]:
#Display the index, columns:
df.index

RangeIndex(start=0, stop=698, step=1)

In [None]:
df.columns

Index(['Unnamed: 0', 'Goal_no', 'Season', 'Competition', 'Matchday', 'Venue',
       'Team', 'Opponent', 'Result', 'Position', 'Minute', 'At_score',
       'Type_of_goal'],
      dtype='object')

In [None]:
#NumPy arrays have one dtype for the entire array, while pandas DataFrames have one dtype per column.
#When you call DataFrame.to_numpy(), pandas will find the NumPy dtype that can hold all of the dtypes in the DataFrame. This may end up being object, which requires casting every value to a Python object.
df.to_numpy()

array([[0, 1, '2/3', ..., '34', '2:0', 'Solo run'],
       [1, 2, '2/3', ..., '90+5', '3:0', 'Header'],
       [2, 3, '2/3', ..., '88', '1:2', 'Right-footed shot'],
       ...,
       [695, 696, '21/22', ..., '34', '2:1', 'Left-footed shot'],
       [696, 697, '21/22', ..., '62', '1:1', 'Right-footed shot'],
       [697, 698, '21/22', ..., '61', '2:0', 'Penalty']], dtype=object)

In [None]:
#describe() shows a quick statistic summary of your data:
df.describe()

Unnamed: 0.1,Unnamed: 0,Goal_no
count,698.0,698.0
mean,348.5,349.5
std,201.63953,201.63953
min,0.0,1.0
25%,174.25,175.25
50%,348.5,349.5
75%,522.75,523.75
max,697.0,698.0


In [None]:
#Transposing your data: Transposing means change the column into row. That meansvice-versa
df.T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,688,689,690,691,692,693,694,695,696,697
Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,688,689,690,691,692,693,694,695,696,697
Goal_no,1,2,3,4,5,6,7,8,9,10,...,689,690,691,692,693,694,695,696,697,698
Season,2/3,2/3,2/3,2/3,2/3,3/4,3/4,3/4,3/4,3/4,...,21/22,21/22,21/22,21/22,21/22,21/22,21/22,21/22,21/22,21/22
Competition,Liga Portugal,Liga Portugal,Liga Portugal,TaÃ§a de Portugal Placard,TaÃ§a de Portugal Placard,Premier League,FA Cup,Premier League,Premier League,Premier League,...,Premier League,Premier League,Premier League,Premier League,Premier League,Premier League,Premier League,Premier League,Premier League,Premier League
Matchday,6,6,8,Fourth Round,Fifth Round,11,Fifth Round,29,32,38,...,18,29,29,29,33,33,33,34,37,35
Venue,H,H,A,H,H,H,H,H,A,A,...,H,H,H,H,H,H,H,A,H,H
Team,Sporting CP,Sporting CP,Sporting CP,Sporting CP,Sporting CP,Manchester United,Manchester United,Manchester United,Manchester United,Manchester United,...,Manchester United,Manchester United,Manchester United,Manchester United,Manchester United,Manchester United,Manchester United,Manchester United,Manchester United,Manchester United
Opponent,Moreirense FC,Moreirense FC,Boavista FC,CD Estarreja,FC Oliveira do Hospital,Portsmouth FC,Manchester City,Tottenham Hotspur,Birmingham City,Aston Villa,...,Brighton & Hove Albion,Tottenham Hotspur,Tottenham Hotspur,Tottenham Hotspur,Norwich City,Norwich City,Norwich City,Arsenal FC,Chelsea FC,Brentford FC
Result,3:0,3:0,1:2,4:1,8:1,3:0,4:2,3:0,1:2,0:2,...,2:0,3:2,3:2,3:2,3:2,3:2,3:2,3:1,1:1,3:0
Position,LW,,,,,RW,RW,,,,...,CF,CF,CF,CF,CF,CF,CF,CF,CF,CF


In [None]:
#Sorting by an axis:
df.sort_index(axis=1, ascending=False)

Unnamed: 0.1,Venue,Unnamed: 0,Type_of_goal,Team,Season,Result,Position,Opponent,Minute,Matchday,Goal_no,Competition,At_score
0,H,0,Solo run,Sporting CP,2/3,3:0,LW,Moreirense FC,34,6,1,Liga Portugal,2:0
1,H,1,Header,Sporting CP,2/3,3:0,,Moreirense FC,90+5,6,2,Liga Portugal,3:0
2,A,2,Right-footed shot,Sporting CP,2/3,1:2,,Boavista FC,88,8,3,Liga Portugal,1:2
3,H,3,Left-footed shot,Sporting CP,2/3,4:1,,CD Estarreja,67,Fourth Round,4,TaÃ§a de Portugal Placard,3:0
4,H,4,,Sporting CP,2/3,8:1,,FC Oliveira do Hospital,13,Fifth Round,5,TaÃ§a de Portugal Placard,3:0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
693,H,693,Header,Manchester United,21/22,3:2,CF,Norwich City,32,33,694,Premier League,2:0
694,H,694,Direct free kick,Manchester United,21/22,3:2,CF,Norwich City,76,33,695,Premier League,3:2
695,A,695,Left-footed shot,Manchester United,21/22,3:1,CF,Arsenal FC,34,34,696,Premier League,2:1
696,H,696,Right-footed shot,Manchester United,21/22,1:1,CF,Chelsea FC,62,37,697,Premier League,1:1


In [None]:
#Sorting by values:
df.sort_values(by='Team')

Unnamed: 0.1,Unnamed: 0,Goal_no,Season,Competition,Matchday,Venue,Team,Opponent,Result,Position,Minute,At_score,Type_of_goal
611,611,612,19/20,Serie A,16,H,Juventus FC,Udinese Calcio,3:1,CF,37,2:0,Left-footed shot
647,647,648,20/21,UEFA Champions League,Group Stage,H,Juventus FC,Dynamo Kyiv,3:0,CF,57,2:0,Right-footed shot
646,646,647,20/21,UEFA Champions League,Group Stage,H,Juventus FC,Ferencvarosi TC,2:1,CF,35,1:1,Left-footed shot
645,645,646,20/21,Serie A,8,H,Juventus FC,Cagliari Calcio,2:0,CF,42,2:0,Right-footed shot
644,644,645,20/21,Serie A,8,H,Juventus FC,Cagliari Calcio,2:0,CF,38,1:0,Right-footed shot
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4,4,5,2/3,TaÃ§a de Portugal Placard,Fifth Round,H,Sporting CP,FC Oliveira do Hospital,8:1,,13,3:0,
3,3,4,2/3,TaÃ§a de Portugal Placard,Fourth Round,H,Sporting CP,CD Estarreja,4:1,,67,3:0,Left-footed shot
2,2,3,2/3,Liga Portugal,8,A,Sporting CP,Boavista FC,1:2,,88,1:2,Right-footed shot
1,1,2,2/3,Liga Portugal,6,H,Sporting CP,Moreirense FC,3:0,,90+5,3:0,Header


# Selection

# Getting
> Selecting a single column, which yields a Series, equivalent to df.A:

In [None]:
df['Team']

0            Sporting CP
1            Sporting CP
2            Sporting CP
3            Sporting CP
4            Sporting CP
             ...        
693    Manchester United
694    Manchester United
695    Manchester United
696    Manchester United
697    Manchester United
Name: Team, Length: 698, dtype: object

> Selecting via [], which slices the rows:

In [None]:
df[0:4]

Unnamed: 0.1,Unnamed: 0,Goal_no,Season,Competition,Matchday,Venue,Team,Opponent,Result,Position,Minute,At_score,Type_of_goal
0,0,1,2/3,Liga Portugal,6,H,Sporting CP,Moreirense FC,3:0,LW,34,2:0,Solo run
1,1,2,2/3,Liga Portugal,6,H,Sporting CP,Moreirense FC,3:0,,90+5,3:0,Header
2,2,3,2/3,Liga Portugal,8,A,Sporting CP,Boavista FC,1:2,,88,1:2,Right-footed shot
3,3,4,2/3,TaÃ§a de Portugal Placard,Fourth Round,H,Sporting CP,CD Estarreja,4:1,,67,3:0,Left-footed shot


In [None]:
df[355:360]

Unnamed: 0.1,Unnamed: 0,Goal_no,Season,Competition,Matchday,Venue,Team,Opponent,Result,Position,Minute,At_score,Type_of_goal
355,355,356,13/14,LaLiga,21,H,Real Madrid,Granada CF,2:0,LW,57,1:0,Right-footed shot
356,356,357,13/14,Copa del Rey,Semi-Finals,A,Real Madrid,Atlatico de Madrid,0:2,LW,6,0:1,Penalty
357,357,358,13/14,Copa del Rey,Semi-Finals,A,Real Madrid,Atlatico de Madrid,0:2,LW,15,0:2,Penalty
358,358,359,13/14,UEFA Champions League,last 16,A,Real Madrid,FC Schalke 4,1:6,LW,52,0:3,Left-footed shot
359,359,360,13/14,UEFA Champions League,last 16,A,Real Madrid,FC Schalke 4,1:6,LW,89,0:6,Right-footed shot


# Selection by label
> For getting a cross section using a label:

In [None]:
df1 = pd.DataFrame(np.random.randn(6,4), index=dates, columns=list('ABCD'))
df1

Unnamed: 0,A,B,C,D
2013-01-01,1.750147,-1.005009,2.396692,0.8233
2013-01-02,1.582759,0.899979,0.842661,1.589764
2013-01-03,-0.721107,0.465468,1.227007,-0.841115
2013-01-04,1.754927,-0.465076,-0.567563,-0.964918
2013-01-05,-0.004276,-2.629235,0.236795,1.401008
2013-01-06,0.109809,-1.038205,1.393569,-0.207766


In [None]:
df1.loc[dates[0]]

A    1.907897
B    0.416196
C    0.160157
D    0.735555
Name: 2013-01-01 00:00:00, dtype: float64

> Selecting on a multi-axis by label:

In [None]:
df1.loc[:, ["A", "B"]]
# here first  : defines all the value, ["A", "B"] defines A & B columns. That means [:, ["A", "B"]] defines all the values from column A & B

Unnamed: 0,A,B
2013-01-01,1.907897,0.416196
2013-01-02,1.452866,0.301314
2013-01-03,-1.223501,-0.373056
2013-01-04,0.244675,-1.302855
2013-01-05,0.774055,0.875841
2013-01-06,0.970371,0.776456


> Showing label slicing, both endpoints are included:

In [None]:
df1.loc['2013-01-02':'2013-01-05',['A','B']]

Unnamed: 0,A,B
2013-01-02,-0.200456,1.522654
2013-01-03,-0.518521,0.719187
2013-01-04,0.77857,1.003627
2013-01-05,0.421893,0.0925


> Reduction in the dimensions of the returned object: ডাইমে্নশন পরিবর্তন হবে। অর্থাৎ কলাম রো তে চলে যাবে। 

In [None]:
df1.loc['2013-01-02', ['A','B']]

A    1.582759
B    0.899979
Name: 2013-01-02 00:00:00, dtype: float64

> For getting a scalar value:

In [None]:
df1.loc[dates[0], 'A']

1.7501473838168196

> For getting fast access to a scalar (equivalent to the prior method):

In [None]:
df1.at[dates[0], 'A']

1.7501473838168196

# Selection by position

> Select via the position of the passed integers:

In [None]:
df1.iloc[3]

A    1.754927
B   -0.465076
C   -0.567563
D   -0.964918
Name: 2013-01-04 00:00:00, dtype: float64

> By integer slices, acting similar to NumPy/Python:

In [None]:
df1.iloc[2:5, 0:3]

Unnamed: 0,A,B,C
2013-01-03,-0.721107,0.465468,1.227007
2013-01-04,1.754927,-0.465076,-0.567563
2013-01-05,-0.004276,-2.629235,0.236795


> By lists of integer position locations, similar to the NumPy/Python style:

In [None]:
df1.iloc[[1,2,3], 0:2]

Unnamed: 0,A,B
2013-01-02,1.582759,0.899979
2013-01-03,-0.721107,0.465468
2013-01-04,1.754927,-0.465076


> For slicing rows explicitly:

In [None]:
df1.iloc[1:3, :]

Unnamed: 0,A,B,C,D
2013-01-02,1.582759,0.899979,0.842661,1.589764
2013-01-03,-0.721107,0.465468,1.227007,-0.841115


> For slicing columns explicitly:

In [None]:
df1.iloc[:, 1:3]

Unnamed: 0,B,C
2013-01-01,-1.005009,2.396692
2013-01-02,0.899979,0.842661
2013-01-03,0.465468,1.227007
2013-01-04,-0.465076,-0.567563
2013-01-05,-2.629235,0.236795
2013-01-06,-1.038205,1.393569


> For getting a value explicitly:

In [None]:
df1.iloc[1, 1]

0.8999791578895616

> For getting fast access to a scalar (equivalent to the prior method):

In [None]:
df1.iat[1, 1]

0.8999791578895616

# Boolean indexing

> Using a single column’s values to select data:

In [None]:
df1[df1['A'] > 0]

Unnamed: 0,A,B,C,D
2013-01-01,1.750147,-1.005009,2.396692,0.8233
2013-01-02,1.582759,0.899979,0.842661,1.589764
2013-01-04,1.754927,-0.465076,-0.567563,-0.964918
2013-01-06,0.109809,-1.038205,1.393569,-0.207766
