TrainingByPackt
diff --git a/‎Chapter 1/Excercises/Excercise_1_creating_features_matrix_target_matrix.ipynb renamed to ‎Chapter 1/Excercises/Excercise_01_creating_features_matrix_target_matrix.ipynb b/‎Chapter 1/Excercises/Excercise_1_creating_features_matrix_target_matrix.ipynb renamed to ‎Chapter 1/Excercises/Excercise_01_creating_features_matrix_target_matrix.ipynb
diff --git a/‎Chapter 1/Excercises/Excercise_2_remove_missing_data.ipynb renamed to ‎Chapter 1/Excercises/Excercise_02_remove_missing_data.ipynb b/‎Chapter 1/Excercises/Excercise_2_remove_missing_data.ipynb renamed to ‎Chapter 1/Excercises/Excercise_02_remove_missing_data.ipynb
diff --git a/‎Chapter 1/Excercises/Excercise_3_Impute_missing_data.ipynb renamed to ‎Chapter 1/Excercises/Excercise_03_Impute_missing_data.ipynb b/‎Chapter 1/Excercises/Excercise_3_Impute_missing_data.ipynb renamed to ‎Chapter 1/Excercises/Excercise_03_Impute_missing_data.ipynb
diff --git a/‎Chapter 1/Excercises/Excercise_4_Find_Outlier_Using_IQR.ipynb renamed to ‎Chapter 1/Excercises/Excercise_04_Find_Outlier_Using_IQR.ipynb b/‎Chapter 1/Excercises/Excercise_4_Find_Outlier_Using_IQR.ipynb renamed to ‎Chapter 1/Excercises/Excercise_04_Find_Outlier_Using_IQR.ipynb
diff --git a/‎Chapter 1/Excercises/Excercise_5_Data _Integration_merge.ipynb renamed to ‎Chapter 1/Excercises/Excercise_05_Data _Integration_merge.ipynb b/‎Chapter 1/Excercises/Excercise_5_Data _Integration_merge.ipynb renamed to ‎Chapter 1/Excercises/Excercise_05_Data _Integration_merge.ipynb
diff --git a/‎Chapter 1/Excercises/Excercise_6_Replacement_Method_From_Categorical_to_Numeric.py.ipynb renamed to ‎Chapter 1/Excercises/Excercise_06_Replacement_Method_From_Categorical_to_Numeric.py.ipynb b/‎Chapter 1/Excercises/Excercise_6_Replacement_Method_From_Categorical_to_Numeric.py.ipynb renamed to ‎Chapter 1/Excercises/Excercise_06_Replacement_Method_From_Categorical_to_Numeric.py.ipynb
diff --git a/‎Chapter 1/Excercises/Excercise_7_Categorical_To_Numeric_Using_Label_Encoder.ipynb renamed to ‎Chapter 1/Excercises/Excercise_07_Categorical_To_Numeric_Using_Label_Encoder.ipynb b/‎Chapter 1/Excercises/Excercise_7_Categorical_To_Numeric_Using_Label_Encoder.ipynb renamed to ‎Chapter 1/Excercises/Excercise_07_Categorical_To_Numeric_Using_Label_Encoder.ipynb
diff --git a/‎Chapter 1/Excercises/Excercise_8_Categorical_To_Numeric_Using_OneHot_Encoder_and_pd.get_dummies.py.ipynb renamed to ‎Chapter 1/Excercises/Excercise_08_Categorical_To_Numeric_Using_OneHot_Encoder_and_pd.get_dummies.py.ipynb b/‎Chapter 1/Excercises/Excercise_8_Categorical_To_Numeric_Using_OneHot_Encoder_and_pd.get_dummies.py.ipynb renamed to ‎Chapter 1/Excercises/Excercise_08_Categorical_To_Numeric_Using_OneHot_Encoder_and_pd.get_dummies.py.ipynb
diff --git a/‎Chapter 1/Excercises/Excercise_9_standard_scaler.py.ipynb renamed to ‎Chapter 1/Excercises/Excercise_09_standard_scaler.py.ipynb b/‎Chapter 1/Excercises/Excercise_9_standard_scaler.py.ipynb renamed to ‎Chapter 1/Excercises/Excercise_09_standard_scaler.py.ipynb
diff --git a/‎Chapter 1/Excercises/Excercise_11_data_discretization.ipynb
Lines changed: 313 additions & 0 deletions b/‎Chapter 1/Excercises/Excercise_11_data_discretization.ipynb
Lines changed: 313 additions & 0 deletions
@@ -0,0 +1,313 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 1. Import the library and Load the dataset to pandas dataframe"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "df = pd.read_csv('../Data/Student_bucketing.csv',header = 0)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 2.\tDisplay the first 5 rows of the Dataframe"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Student_id</th>\n",
+       "      <th>Age</th>\n",
+       "      <th>Grade</th>\n",
+       "      <th>Employed</th>\n",
+       "      <th>marks</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>19</td>\n",
+       "      <td>1st Class</td>\n",
+       "      <td>yes</td>\n",
+       "      <td>29</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>20</td>\n",
+       "      <td>2nd Class</td>\n",
+       "      <td>no</td>\n",
+       "      <td>41</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>3</td>\n",
+       "      <td>18</td>\n",
+       "      <td>1st Class</td>\n",
+       "      <td>no</td>\n",
+       "      <td>57</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>4</td>\n",
+       "      <td>21</td>\n",
+       "      <td>2nd Class</td>\n",
+       "      <td>no</td>\n",
+       "      <td>29</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>5</td>\n",
+       "      <td>19</td>\n",
+       "      <td>1st Class</td>\n",
+       "      <td>no</td>\n",
+       "      <td>57</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   Student_id  Age      Grade Employed  marks\n",
+       "0           1   19  1st Class      yes     29\n",
+       "1           2   20  2nd Class       no     41\n",
+       "2           3   18  1st Class       no     57\n",
+       "3           4   21  2nd Class       no     29\n",
+       "4           5   19  1st Class       no     57"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 3.\tPerforming bucketing using the pd.cut() function on the marks column and displaying the top 10 columns. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df['bucket'] = pd.cut(df['marks'],5,labels = ['Poor','Below_average','Average','Above_Average','Excellent'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Student_id</th>\n",
+       "      <th>Age</th>\n",
+       "      <th>Grade</th>\n",
+       "      <th>Employed</th>\n",
+       "      <th>marks</th>\n",
+       "      <th>bucket</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>19</td>\n",
+       "      <td>1st Class</td>\n",
+       "      <td>yes</td>\n",
+       "      <td>29</td>\n",
+       "      <td>Poor</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>20</td>\n",
+       "      <td>2nd Class</td>\n",
+       "      <td>no</td>\n",
+       "      <td>41</td>\n",
+       "      <td>Below_average</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>3</td>\n",
+       "      <td>18</td>\n",
+       "      <td>1st Class</td>\n",
+       "      <td>no</td>\n",
+       "      <td>57</td>\n",
+       "      <td>Average</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>4</td>\n",
+       "      <td>21</td>\n",
+       "      <td>2nd Class</td>\n",
+       "      <td>no</td>\n",
+       "      <td>29</td>\n",
+       "      <td>Poor</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>5</td>\n",
+       "      <td>19</td>\n",
+       "      <td>1st Class</td>\n",
+       "      <td>no</td>\n",
+       "      <td>57</td>\n",
+       "      <td>Average</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>6</td>\n",
+       "      <td>20</td>\n",
+       "      <td>2nd Class</td>\n",
+       "      <td>yes</td>\n",
+       "      <td>53</td>\n",
+       "      <td>Average</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>7</td>\n",
+       "      <td>19</td>\n",
+       "      <td>3rd Class</td>\n",
+       "      <td>yes</td>\n",
+       "      <td>78</td>\n",
+       "      <td>Above_Average</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>8</td>\n",
+       "      <td>21</td>\n",
+       "      <td>3rd Class</td>\n",
+       "      <td>yes</td>\n",
+       "      <td>70</td>\n",
+       "      <td>Above_Average</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>9</td>\n",
+       "      <td>22</td>\n",
+       "      <td>3rd Class</td>\n",
+       "      <td>yes</td>\n",
+       "      <td>97</td>\n",
+       "      <td>Excellent</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>10</td>\n",
+       "      <td>21</td>\n",
+       "      <td>1st Class</td>\n",
+       "      <td>no</td>\n",
+       "      <td>58</td>\n",
+       "      <td>Average</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   Student_id  Age      Grade Employed  marks         bucket\n",
+       "0           1   19  1st Class      yes     29           Poor\n",
+       "1           2   20  2nd Class       no     41  Below_average\n",
+       "2           3   18  1st Class       no     57        Average\n",
+       "3           4   21  2nd Class       no     29           Poor\n",
+       "4           5   19  1st Class       no     57        Average\n",
+       "5           6   20  2nd Class      yes     53        Average\n",
+       "6           7   19  3rd Class      yes     78  Above_Average\n",
+       "7           8   21  3rd Class      yes     70  Above_Average\n",
+       "8           9   22  3rd Class      yes     97      Excellent\n",
+       "9          10   21  1st Class       no     58        Average"
+      ]
+     },
+     "execution_count": 32,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.head(10)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}