Skip to content

Commit 00b88de

Browse files
committed
Scaling Excercise
1 parent d4c5f37 commit 00b88de

File tree

2 files changed

+569
-0
lines changed

2 files changed

+569
-0
lines changed
Lines changed: 290 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,290 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"### 1.\tImport the necessary library and Load the dataset into the pandas dataframe"
8+
]
9+
},
10+
{
11+
"cell_type": "code",
12+
"execution_count": 1,
13+
"metadata": {},
14+
"outputs": [
15+
{
16+
"name": "stderr",
17+
"output_type": "stream",
18+
"text": [
19+
"/anaconda3/lib/python3.6/importlib/_bootstrap.py:219: RuntimeWarning: numpy.ufunc size changed, may indicate binary incompatibility. Expected 216, got 192\n",
20+
" return f(*args, **kwds)\n",
21+
"/anaconda3/lib/python3.6/importlib/_bootstrap.py:219: RuntimeWarning: numpy.ufunc size changed, may indicate binary incompatibility. Expected 216, got 192\n",
22+
" return f(*args, **kwds)\n"
23+
]
24+
}
25+
],
26+
"source": [
27+
"import pandas as pd\n",
28+
"from sklearn.preprocessing import MinMaxScaler\n",
29+
"df = pd.read_csv(\"../Data/Wholesale customers data.csv\")"
30+
]
31+
},
32+
{
33+
"cell_type": "markdown",
34+
"metadata": {},
35+
"source": [
36+
"### 2.\tCheck if there is missing data available if yes drop the missing data."
37+
]
38+
},
39+
{
40+
"cell_type": "code",
41+
"execution_count": 2,
42+
"metadata": {},
43+
"outputs": [
44+
{
45+
"data": {
46+
"text/html": [
47+
"<div>\n",
48+
"<style scoped>\n",
49+
" .dataframe tbody tr th:only-of-type {\n",
50+
" vertical-align: middle;\n",
51+
" }\n",
52+
"\n",
53+
" .dataframe tbody tr th {\n",
54+
" vertical-align: top;\n",
55+
" }\n",
56+
"\n",
57+
" .dataframe thead th {\n",
58+
" text-align: right;\n",
59+
" }\n",
60+
"</style>\n",
61+
"<table border=\"1\" class=\"dataframe\">\n",
62+
" <thead>\n",
63+
" <tr style=\"text-align: right;\">\n",
64+
" <th></th>\n",
65+
" <th>Null</th>\n",
66+
" <th>type</th>\n",
67+
" </tr>\n",
68+
" </thead>\n",
69+
" <tbody>\n",
70+
" <tr>\n",
71+
" <th>Channel</th>\n",
72+
" <td>False</td>\n",
73+
" <td>int64</td>\n",
74+
" </tr>\n",
75+
" <tr>\n",
76+
" <th>Region</th>\n",
77+
" <td>False</td>\n",
78+
" <td>int64</td>\n",
79+
" </tr>\n",
80+
" <tr>\n",
81+
" <th>Fresh</th>\n",
82+
" <td>False</td>\n",
83+
" <td>int64</td>\n",
84+
" </tr>\n",
85+
" <tr>\n",
86+
" <th>Milk</th>\n",
87+
" <td>False</td>\n",
88+
" <td>int64</td>\n",
89+
" </tr>\n",
90+
" <tr>\n",
91+
" <th>Grocery</th>\n",
92+
" <td>False</td>\n",
93+
" <td>int64</td>\n",
94+
" </tr>\n",
95+
" <tr>\n",
96+
" <th>Frozen</th>\n",
97+
" <td>False</td>\n",
98+
" <td>int64</td>\n",
99+
" </tr>\n",
100+
" <tr>\n",
101+
" <th>Detergents_Paper</th>\n",
102+
" <td>False</td>\n",
103+
" <td>int64</td>\n",
104+
" </tr>\n",
105+
" <tr>\n",
106+
" <th>Delicassen</th>\n",
107+
" <td>False</td>\n",
108+
" <td>int64</td>\n",
109+
" </tr>\n",
110+
" </tbody>\n",
111+
"</table>\n",
112+
"</div>"
113+
],
114+
"text/plain": [
115+
" Null type\n",
116+
"Channel False int64\n",
117+
"Region False int64\n",
118+
"Fresh False int64\n",
119+
"Milk False int64\n",
120+
"Grocery False int64\n",
121+
"Frozen False int64\n",
122+
"Detergents_Paper False int64\n",
123+
"Delicassen False int64"
124+
]
125+
},
126+
"execution_count": 2,
127+
"metadata": {},
128+
"output_type": "execute_result"
129+
}
130+
],
131+
"source": [
132+
"null_ = df.isna().any()\n",
133+
"dtypes = df.dtypes\n",
134+
"info = pd.concat([null_,dtypes],axis = 1,keys = ['Null','type'])\n",
135+
"info"
136+
]
137+
},
138+
{
139+
"cell_type": "markdown",
140+
"metadata": {},
141+
"source": [
142+
"### 3.\tPerform the Normalization scaling. To do so, use MinMaxScaler() class from sklearn.preprocessing and implement fit_transorm() method"
143+
]
144+
},
145+
{
146+
"cell_type": "code",
147+
"execution_count": 3,
148+
"metadata": {},
149+
"outputs": [
150+
{
151+
"data": {
152+
"text/html": [
153+
"<div>\n",
154+
"<style scoped>\n",
155+
" .dataframe tbody tr th:only-of-type {\n",
156+
" vertical-align: middle;\n",
157+
" }\n",
158+
"\n",
159+
" .dataframe tbody tr th {\n",
160+
" vertical-align: top;\n",
161+
" }\n",
162+
"\n",
163+
" .dataframe thead th {\n",
164+
" text-align: right;\n",
165+
" }\n",
166+
"</style>\n",
167+
"<table border=\"1\" class=\"dataframe\">\n",
168+
" <thead>\n",
169+
" <tr style=\"text-align: right;\">\n",
170+
" <th></th>\n",
171+
" <th>Channel</th>\n",
172+
" <th>Region</th>\n",
173+
" <th>Fresh</th>\n",
174+
" <th>Milk</th>\n",
175+
" <th>Grocery</th>\n",
176+
" <th>Frozen</th>\n",
177+
" <th>Detergents_Paper</th>\n",
178+
" <th>Delicassen</th>\n",
179+
" </tr>\n",
180+
" </thead>\n",
181+
" <tbody>\n",
182+
" <tr>\n",
183+
" <th>0</th>\n",
184+
" <td>1.0</td>\n",
185+
" <td>1.0</td>\n",
186+
" <td>0.112940</td>\n",
187+
" <td>0.130727</td>\n",
188+
" <td>0.081464</td>\n",
189+
" <td>0.003106</td>\n",
190+
" <td>0.065427</td>\n",
191+
" <td>0.027847</td>\n",
192+
" </tr>\n",
193+
" <tr>\n",
194+
" <th>1</th>\n",
195+
" <td>1.0</td>\n",
196+
" <td>1.0</td>\n",
197+
" <td>0.062899</td>\n",
198+
" <td>0.132824</td>\n",
199+
" <td>0.103097</td>\n",
200+
" <td>0.028548</td>\n",
201+
" <td>0.080590</td>\n",
202+
" <td>0.036984</td>\n",
203+
" </tr>\n",
204+
" <tr>\n",
205+
" <th>2</th>\n",
206+
" <td>1.0</td>\n",
207+
" <td>1.0</td>\n",
208+
" <td>0.056622</td>\n",
209+
" <td>0.119181</td>\n",
210+
" <td>0.082790</td>\n",
211+
" <td>0.039116</td>\n",
212+
" <td>0.086052</td>\n",
213+
" <td>0.163559</td>\n",
214+
" </tr>\n",
215+
" <tr>\n",
216+
" <th>3</th>\n",
217+
" <td>0.0</td>\n",
218+
" <td>1.0</td>\n",
219+
" <td>0.118254</td>\n",
220+
" <td>0.015536</td>\n",
221+
" <td>0.045464</td>\n",
222+
" <td>0.104842</td>\n",
223+
" <td>0.012346</td>\n",
224+
" <td>0.037234</td>\n",
225+
" </tr>\n",
226+
" <tr>\n",
227+
" <th>4</th>\n",
228+
" <td>1.0</td>\n",
229+
" <td>1.0</td>\n",
230+
" <td>0.201626</td>\n",
231+
" <td>0.072914</td>\n",
232+
" <td>0.077552</td>\n",
233+
" <td>0.063934</td>\n",
234+
" <td>0.043455</td>\n",
235+
" <td>0.108093</td>\n",
236+
" </tr>\n",
237+
" </tbody>\n",
238+
"</table>\n",
239+
"</div>"
240+
],
241+
"text/plain": [
242+
" Channel Region Fresh Milk Grocery Frozen Detergents_Paper \\\n",
243+
"0 1.0 1.0 0.112940 0.130727 0.081464 0.003106 0.065427 \n",
244+
"1 1.0 1.0 0.062899 0.132824 0.103097 0.028548 0.080590 \n",
245+
"2 1.0 1.0 0.056622 0.119181 0.082790 0.039116 0.086052 \n",
246+
"3 0.0 1.0 0.118254 0.015536 0.045464 0.104842 0.012346 \n",
247+
"4 1.0 1.0 0.201626 0.072914 0.077552 0.063934 0.043455 \n",
248+
"\n",
249+
" Delicassen \n",
250+
"0 0.027847 \n",
251+
"1 0.036984 \n",
252+
"2 0.163559 \n",
253+
"3 0.037234 \n",
254+
"4 0.108093 "
255+
]
256+
},
257+
"execution_count": 3,
258+
"metadata": {},
259+
"output_type": "execute_result"
260+
}
261+
],
262+
"source": [
263+
"norm_scale = MinMaxScaler().fit_transform(df)\n",
264+
"scaled_frame = pd.DataFrame(norm_scale,columns=df.columns)\n",
265+
"scaled_frame.head()"
266+
]
267+
}
268+
],
269+
"metadata": {
270+
"kernelspec": {
271+
"display_name": "Python 3",
272+
"language": "python",
273+
"name": "python3"
274+
},
275+
"language_info": {
276+
"codemirror_mode": {
277+
"name": "ipython",
278+
"version": 3
279+
},
280+
"file_extension": ".py",
281+
"mimetype": "text/x-python",
282+
"name": "python",
283+
"nbconvert_exporter": "python",
284+
"pygments_lexer": "ipython3",
285+
"version": "3.6.4"
286+
}
287+
},
288+
"nbformat": 4,
289+
"nbformat_minor": 2
290+
}

0 commit comments

Comments
 (0)