<a href="https://colab.research.google.com/github/MiladQolami/Mathematical-tools-for-neuroscience/blob/main/Machine_Learning/IntroToLibraries.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Introduction

Pandas is a popular open-source library in Python for data manipulation and analysis. It provides powerful data structures and data analysis tools that make working with structured data more efficient and intuitive.

In [89]:
# Import libraries
import numpy as np
import pandas as pd
import sklearn as sk
import tensorflow as tf
import matplotlib.pyplot as plt

## Pandas Data Structures

Most important data structures in Pandas are Series and DataFrames.

### Series


In [90]:
series1 = pd.Series([1, 3, 4, 2, 6, 4, 4])
series1


0    1
1    3
2    4
3    2
4    6
5    4
6    4
dtype: int64

### DataFrames


In [91]:
observation = {
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Math': [80, 90, 75],
    'Science': [85, 92, 88],
    'English': [70, 80, 78]
}
dataframe1 = pd.DataFrame(data=observation)
dataframe1

Unnamed: 0,Name,Math,Science,English
0,Alice,80,85,70
1,Bob,90,92,80
2,Charlie,75,88,78


## Reshaping DataFrames


### Melting a DataFrame


In [92]:

melted_df = pd.melt(dataframe1, id_vars='Name', value_vars=['Math', 'Science', 'English'], var_name='Subject', value_name='Score')
melted_df


Unnamed: 0,Name,Subject,Score
0,Alice,Math,80
1,Bob,Math,90
2,Charlie,Math,75
3,Alice,Science,85
4,Bob,Science,92
5,Charlie,Science,88
6,Alice,English,70
7,Bob,English,80
8,Charlie,English,78


### Pivoting a DataFrame


In [93]:
pd.pivot(melted_df, index='Name', columns='Subject', values='Score')


Subject,English,Math,Science
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Alice,70,80,85
Bob,80,90,92
Charlie,78,75,88


## Subsetting DataFrames


### Loading a bigger dataset


In [94]:
from sklearn import datasets
data = datasets.load_breast_cancer()
breast_cancer = pd.DataFrame(data.data, columns=data.feature_names)


### Checking columns of the dataset


In [95]:
breast_cancer.columns


Index(['mean radius', 'mean texture', 'mean perimeter', 'mean area',
       'mean smoothness', 'mean compactness', 'mean concavity',
       'mean concave points', 'mean symmetry', 'mean fractal dimension',
       'radius error', 'texture error', 'perimeter error', 'area error',
       'smoothness error', 'compactness error', 'concavity error',
       'concave points error', 'symmetry error', 'fractal dimension error',
       'worst radius', 'worst texture', 'worst perimeter', 'worst area',
       'worst smoothness', 'worst compactness', 'worst concavity',
       'worst concave points', 'worst symmetry', 'worst fractal dimension'],
      dtype='object')

### Viewing first four rows


In [96]:
breast_cancer.head()


Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


### Randomly sampling some rows


In [97]:
breast_cancer.sample(5)


Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension
25,17.14,16.4,116.0,912.7,0.1186,0.2276,0.2229,0.1401,0.304,0.07413,...,22.25,21.4,152.4,1461.0,0.1545,0.3949,0.3853,0.255,0.4066,0.1059
314,8.597,18.6,54.09,221.2,0.1074,0.05847,0.0,0.0,0.2163,0.07359,...,8.952,22.44,56.65,240.1,0.1347,0.07767,0.0,0.0,0.3142,0.08116
60,10.17,14.88,64.55,311.9,0.1134,0.08061,0.01084,0.0129,0.2743,0.0696,...,11.02,17.45,69.86,368.6,0.1275,0.09866,0.02168,0.02579,0.3557,0.0802
296,10.91,12.35,69.14,363.7,0.08518,0.04721,0.01236,0.01369,0.1449,0.06031,...,11.37,14.82,72.42,392.2,0.09312,0.07506,0.02884,0.03194,0.2143,0.06643
365,20.44,21.78,133.8,1293.0,0.0915,0.1131,0.09799,0.07785,0.1618,0.05557,...,24.31,26.37,161.2,1780.0,0.1327,0.2376,0.2702,0.1765,0.2609,0.06735


### Selecting n rows with largest (or smallest) values


In [98]:
breast_cancer.nlargest(n=5, columns='mean perimeter')


Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension
212,28.11,18.47,188.5,2499.0,0.1142,0.1516,0.3201,0.1595,0.1648,0.05525,...,28.11,18.47,188.5,2499.0,0.1142,0.1516,0.3201,0.1595,0.1648,0.05525
461,27.42,26.27,186.9,2501.0,0.1084,0.1988,0.3635,0.1689,0.2061,0.05623,...,36.04,31.37,251.2,4254.0,0.1357,0.4256,0.6833,0.2625,0.2641,0.07427
180,27.22,21.87,182.1,2250.0,0.1094,0.1914,0.2871,0.1878,0.18,0.0577,...,33.12,32.85,220.8,3216.0,0.1472,0.4034,0.534,0.2688,0.2856,0.08082
352,25.73,17.46,174.2,2010.0,0.1149,0.2363,0.3368,0.1913,0.1956,0.06121,...,33.13,23.58,229.3,3234.0,0.153,0.5937,0.6451,0.2756,0.369,0.08815
82,25.22,24.91,171.5,1878.0,0.1063,0.2665,0.3339,0.1845,0.1829,0.06782,...,30.0,33.62,211.7,2562.0,0.1573,0.6076,0.6476,0.2867,0.2355,0.1051


### Logical indexing of rows


In [99]:
breast_cancer[breast_cancer['mean radius'] > breast_cancer['mean texture']]


Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension
0,17.99,10.38,122.80,1001.0,0.11840,0.27760,0.30010,0.14710,0.2419,0.07871,...,25.38,17.33,184.60,2019.0,0.1622,0.6656,0.7119,0.26540,0.4601,0.11890
1,20.57,17.77,132.90,1326.0,0.08474,0.07864,0.08690,0.07017,0.1812,0.05667,...,24.99,23.41,158.80,1956.0,0.1238,0.1866,0.2416,0.18600,0.2750,0.08902
4,20.29,14.34,135.10,1297.0,0.10030,0.13280,0.19800,0.10430,0.1809,0.05883,...,22.54,16.67,152.20,1575.0,0.1374,0.2050,0.4000,0.16250,0.2364,0.07678
22,15.34,14.26,102.50,704.4,0.10730,0.21350,0.20770,0.09756,0.2521,0.07032,...,18.07,19.08,125.10,980.9,0.1390,0.5954,0.6305,0.23930,0.4667,0.09946
25,17.14,16.40,116.00,912.7,0.11860,0.22760,0.22290,0.14010,0.3040,0.07413,...,22.25,21.40,152.40,1461.0,0.1545,0.3949,0.3853,0.25500,0.4066,0.10590
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
511,14.81,14.70,94.66,680.7,0.08472,0.05016,0.03416,0.02541,0.1659,0.05348,...,15.61,17.58,101.70,760.2,0.1139,0.1011,0.1101,0.07955,0.2334,0.06142
513,14.58,13.66,94.29,658.8,0.09832,0.08918,0.08222,0.04349,0.1739,0.05640,...,16.76,17.24,108.50,862.0,0.1223,0.1928,0.2492,0.09186,0.2626,0.07048
521,24.63,21.60,165.50,1841.0,0.10300,0.21060,0.23100,0.14710,0.1991,0.06739,...,29.92,26.93,205.70,2642.0,0.1342,0.4188,0.4658,0.24750,0.3157,0.09671
527,12.34,12.27,78.94,468.5,0.09003,0.06307,0.02958,0.02647,0.1689,0.05808,...,13.61,19.27,87.22,564.9,0.1292,0.2074,0.1791,0.10700,0.3110,0.07592


### Selecting single or multiple columns


In [100]:

breast_cancer['mean area']
breast_cancer[['mean area', 'worst texture']]
breast_cancer.filter(regex='mean ')

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension
0,17.99,10.38,122.80,1001.0,0.11840,0.27760,0.30010,0.14710,0.2419,0.07871
1,20.57,17.77,132.90,1326.0,0.08474,0.07864,0.08690,0.07017,0.1812,0.05667
2,19.69,21.25,130.00,1203.0,0.10960,0.15990,0.19740,0.12790,0.2069,0.05999
3,11.42,20.38,77.58,386.1,0.14250,0.28390,0.24140,0.10520,0.2597,0.09744
4,20.29,14.34,135.10,1297.0,0.10030,0.13280,0.19800,0.10430,0.1809,0.05883
...,...,...,...,...,...,...,...,...,...,...
564,21.56,22.39,142.00,1479.0,0.11100,0.11590,0.24390,0.13890,0.1726,0.05623
565,20.13,28.25,131.20,1261.0,0.09780,0.10340,0.14400,0.09791,0.1752,0.05533
566,16.60,28.08,108.30,858.1,0.08455,0.10230,0.09251,0.05302,0.1590,0.05648
567,20.60,29.33,140.10,1265.0,0.11780,0.27700,0.35140,0.15200,0.2397,0.07016


### Subsetting rows and columns based on integer location


In [101]:
breast_cancer.iloc[12:15]
breast_cancer.iloc[20:30, 5:10]


Unnamed: 0,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension
20,0.127,0.04568,0.0311,0.1967,0.06811
21,0.06492,0.02956,0.02076,0.1815,0.06905
22,0.2135,0.2077,0.09756,0.2521,0.07032
23,0.1022,0.1097,0.08632,0.1769,0.05278
24,0.1457,0.1525,0.0917,0.1995,0.0633
25,0.2276,0.2229,0.1401,0.304,0.07413
26,0.1868,0.1425,0.08783,0.2252,0.06924
27,0.1066,0.149,0.07731,0.1697,0.05699
28,0.1697,0.1683,0.08751,0.1926,0.0654
29,0.1157,0.09875,0.07953,0.1739,0.06149


### Indexing and selecting data based on labels of rows and columns (integer location)


In [102]:
breast_cancer.loc[0:10, ['mean area', 'mean texture']]


Unnamed: 0,mean area,mean texture
0,1001.0,10.38
1,1326.0,17.77
2,1203.0,21.25
3,386.1,20.38
4,1297.0,14.34
5,477.1,15.7
6,1040.0,19.98
7,577.9,20.83
8,519.8,21.82
9,475.9,24.04


## Data Summaries


### Summary statistics


In [103]:
breast_cancer.describe()
breast_cancer.median()

mean radius                 13.370000
mean texture                18.840000
mean perimeter              86.240000
mean area                  551.100000
mean smoothness              0.095870
mean compactness             0.092630
mean concavity               0.061540
mean concave points          0.033500
mean symmetry                0.179200
mean fractal dimension       0.061540
radius error                 0.324200
texture error                1.108000
perimeter error              2.287000
area error                  24.530000
smoothness error             0.006380
compactness error            0.020450
concavity error              0.025890
concave points error         0.010930
symmetry error               0.018730
fractal dimension error      0.003187
worst radius                14.970000
worst texture               25.410000
worst perimeter             97.660000
worst area                 686.500000
worst smoothness             0.131300
worst compactness            0.211900
worst concav

### Shape of the DataFrame


In [104]:
breast_cancer.shape

(569, 30)

### Applying a function to the DataFrame


In [105]:
breast_cancer.apply(np.sqrt)

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension
0,4.241462,3.221801,11.081516,31.638584,0.344093,0.526878,0.547814,0.383536,0.491833,0.280553,...,5.037857,4.162932,13.586758,44.933284,0.402741,0.815843,0.843742,0.515170,0.678307,0.344819
1,4.535416,4.215448,11.528226,36.414283,0.291101,0.280428,0.294788,0.264896,0.425676,0.238055,...,4.999000,4.838388,12.601587,44.226689,0.351852,0.431972,0.491528,0.431277,0.524404,0.298362
2,4.437342,4.609772,11.401754,34.684290,0.331059,0.399875,0.444297,0.357631,0.454863,0.244929,...,4.854894,5.052722,12.349089,41.340053,0.380000,0.651537,0.671118,0.492950,0.601082,0.295939
3,3.379349,4.514421,8.807951,19.649427,0.377492,0.532823,0.491325,0.324345,0.509608,0.312154,...,3.861347,5.147815,9.943339,23.826456,0.458039,0.930752,0.828794,0.507445,0.814739,0.415933
4,4.504442,3.786819,11.623253,36.013886,0.316702,0.364417,0.444972,0.322955,0.425323,0.242549,...,4.747631,4.082891,12.336936,39.686270,0.370675,0.452769,0.632456,0.403113,0.486210,0.277092
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
564,4.643275,4.731807,11.916375,38.457769,0.333167,0.340441,0.493862,0.372693,0.415452,0.237129,...,5.044799,5.138093,12.887979,45.022217,0.375500,0.459674,0.640859,0.470744,0.453872,0.266740
565,4.486647,5.315073,11.454257,35.510562,0.312730,0.321559,0.379473,0.312906,0.418569,0.235223,...,4.867237,6.184658,12.449900,41.605288,0.341467,0.438406,0.567010,0.403485,0.507149,0.257624
566,4.074310,5.299057,10.406729,29.293344,0.290775,0.319844,0.304155,0.230261,0.398748,0.237655,...,4.356604,5.841233,11.256109,33.526109,0.337491,0.556237,0.583352,0.376563,0.470956,0.279643
567,4.538722,5.415718,11.836385,35.566838,0.343220,0.526308,0.592790,0.389872,0.489592,0.264877,...,5.073460,6.278535,13.586758,42.673177,0.406202,0.931719,0.968865,0.514782,0.639296,0.352136



## Grouping Data
