-
Notifications
You must be signed in to change notification settings - Fork 0
/
Form1.cs
345 lines (308 loc) · 11.3 KB
/
Form1.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using HtmlAgilityPack;
using System.Threading;
using System.Threading.Tasks;
using System.Net;
using System.IO;
using System.Text.RegularExpressions;
/* Saving for delegation method
this.Invoke((MethodInvoker)delegate
{
System.Diagnostics.Debug.Write("This program is expected to throw WebException on successful run." +
"\n\nException Message :" + e.Message);
});
Saving for streamwriter method
using (StreamWriter writer = new StreamWriter(@"C:\Users\Desktop\Desktop\LPProductURLs.txt"))
{
writer.WriteLine(fuskUrl);
}
*/
namespace LPSD_Migration
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
// Declare our worker thread
private Thread workerThread = null;
private void startButton_Click(object sender, EventArgs e)
{
// Initialise and start worker thread
this.workerThread = new Thread(new ThreadStart(this.FuskSetup));
this.workerThread.Start();
}
private void pullCategoryButton_Click(object sender, EventArgs e)
{
// Load up all the cat data send it to Download
// A little sloppy due to deadline
string website = "http://www.lockhartphillipsusa.com/store/image.php?type=C&id=";
string categoryIdData = "";
using (StreamReader reader = new StreamReader(@"C:\LPUSA Migration\categories\catIDs.txt"))
{
categoryIdData = reader.ReadToEnd();
}
string[] categoryIds = Regex.Split(categoryIdData,"\r\n");
byte[] data = new byte[50000];
Image _Image = null;
// Download
foreach (string category in categoryIds)
{
string conCatAddress = website + category;
try
{
_Image = DownloadImage(conCatAddress);
}
catch (WebException r)
{
System.Diagnostics.Debug.Write(r.Message);
continue;
}
// check for valid image
if (_Image != null)
{
// lets save image to disk
System.IO.Directory.CreateDirectory(@"C:\LPUSA Migration\categories\" + category);
_Image.Save(@"C:\LPUSA Migration\categories\" + category + @"\main.jpg");
}
using (StreamWriter writer = new StreamWriter(@"C:\LPUSA Migration\categories\catImport.txt", true))
{
writer.WriteLine(category + ";/home/lockhart/public_html/images/migration/categories/" + category + "/main.jpg");
}
}
}
/// <summary>
/// Function to download Image from website
/// </summary>
/// <param name="_URL">URL address to download image</param>
/// <returns>Image</returns>
public Image DownloadImage(string _URL)
{
Image _tmpImage = null;
try
{
// Open a connection
System.Net.HttpWebRequest _HttpWebRequest = (System.Net.HttpWebRequest)System.Net.HttpWebRequest.Create(_URL);
_HttpWebRequest.AllowWriteStreamBuffering = true;
// You can also specify additional header values like the user agent or the referer: (Optional)
_HttpWebRequest.UserAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)";
_HttpWebRequest.Referer = "http://www.google.com/";
// set timeout for 20 seconds (Optional)
_HttpWebRequest.Timeout = 60000;
// Request response:
System.Net.WebResponse _WebResponse = _HttpWebRequest.GetResponse();
// Open data stream:
System.IO.Stream _WebStream = _WebResponse.GetResponseStream();
// convert webstream to image
_tmpImage = Image.FromStream(_WebStream);
// Cleanup
_WebResponse.Close();
_WebResponse.Close();
}
catch (Exception _Exception)
{
// Error
Console.WriteLine("Exception caught in process: {0}", _Exception.ToString());
return null;
}
return _tmpImage;
}
// Set the fusk criteria
public void FuskSetup()
{
FuskMethodCriteriaProperties fusk = new FuskMethodCriteriaProperties();
AgilityPackWebsite agility = new AgilityPackWebsite();
agility.Website = "http://www.lockhartphillipsusa.com/store/product.php?productid=";
// Ints only
fusk.Start = 17135;
fusk.End = 21000;
// Prep the fusk list with url prepended from agility
List<string> fuskList = new List<string>();
for (int i = fusk.Start; i <= fusk.End; i++)
{
fuskList.Add(agility.Website + i.ToString());
}
StartFusking(fuskList);
}
// Start fusking based on the fusk criteria
public void StartFusking(List<string> fuskList)
{
WebClient client = new WebClient();
// Pull HTML Code
foreach (string fuskUrl in fuskList)
{
string htmlCode = "";
try
{
htmlCode = client.DownloadString(fuskUrl);
}
catch (WebException)
{
continue;
}
finally
{
client.Dispose();
}
// URL is good, let's parse it with agilitypack and get the data we want.
StartParsingWithAgilityPack(htmlCode);
}
}
// Parse
public void StartParsingWithAgilityPack(string html)
{
// Load up HTML
HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
doc.LoadHtml(html);
// Parse for the SKU
string productSku = "";
foreach (HtmlNode link in doc.DocumentNode.SelectNodes("//*[@id=\"product_code\"]"))
{
productSku = link.InnerText;
//System.Diagnostics.Debug.Write(link.InnerText + "\n");
}
// Parse for the main image
string mainImageUrl = "";
foreach (HtmlNode link in doc.DocumentNode.SelectNodes("//*[@id=\"product_thumbnail\"]"))
{
HtmlAttribute att = link.Attributes["src"];
mainImageUrl = att.Value;
//System.Diagnostics.Debug.Write(mainImageUrl + "\n");
}
// Parse for the detailed images
List<string> detailedImages = new List<string>();
try
{
foreach (HtmlNode link in doc.DocumentNode.SelectNodes("//*[@class=\"detailed-images-other\"]"))
{
HtmlAttribute att = link.Attributes["src"];
detailedImages.Add(att.Value); // This is parsing & signs with & must be aware of this.
//System.Diagnostics.Debug.Write(att.Value + "\n");
}
}
catch
{
// This product doesn't have a detailed image, use main image
foreach (HtmlNode link in doc.DocumentNode.SelectNodes("//*[@id=\"product_thumbnail\"]"))
{
HtmlAttribute att = link.Attributes["src"];
detailedImages.Add(att.Value);
}
}
// Everything is parsed send it to post processing
ProcessTheProductDataAndDownloadImages(productSku, mainImageUrl, detailedImages);
}
// Process
public void ProcessTheProductDataAndDownloadImages(string sku, string mainImageUrl, List<string> detailedImages)
{
// Create image dir
try
{
System.IO.Directory.CreateDirectory(@"C:\LPUSA Migration\" + sku);
}
catch (System.IO.IOException e)
{
System.Diagnostics.Debug.Write(e.Message);
}
// Download main image
mainImageUrl = mainImageUrl.Replace("&", "&");
WebClient client = new WebClient();
try
{
client.DownloadFile(mainImageUrl, @"C:\LPUSA Migration\" + sku + @"\main.jpg");
}
catch (WebException e)
{
System.Diagnostics.Debug.Write(e.Message);
}
client.Dispose();
using (StreamWriter writer = new StreamWriter(@"C:\LPUSA Migration\main_image.txt", true))
{
writer.WriteLine(sku + ";" + @"images/P/{0}/main.jpg", sku);
}
// Download detailed images and write them
//string nonLocalDetailedImageUrl = "";
for (int i = 0; i < detailedImages.Count; i++)
{
// & fix
detailedImages[i] = detailedImages[i].Replace("&", "&");
//nonLocalDetailedImageUrl = detailedImages[i];
// Remove broken image links
if (detailedImages[i] == "/store/default_image.gif")
{
continue;
}
// Download detailed image and name file to SKU
try
{
client.DownloadFile(detailedImages[i], @"C:\LPUSA Migration\" + sku + @"\" + i + ".jpg");
}
catch (WebException e)
{
System.Diagnostics.Debug.Write(e.Message);
}
finally
{
client.Dispose();
using (StreamWriter writer = new StreamWriter(@"C:\LPUSA Migration\detailed.txt", true))
{
//PATH is temp, will do find and replace on this
writer.WriteLine(sku + ";" + @"images/D/{0}/{1}.jpg", sku, i);
}
}
}
}
}
// Get/Set the website to crawl
class AgilityPackWebsite
{
string _website;
public string Website
{
get
{
return this._website;
}
set
{
this._website = value;
}
}
}
// Get/Set the fusking criteria
class FuskMethodCriteriaProperties
{
int _startProperty;
public int Start
{
get
{
return this._startProperty;
}
set
{
this._startProperty = value;
}
}
int _endProperty;
public int End
{
get
{
return this._endProperty;
}
set
{
this._endProperty = value;
}
}
}
}