/
Coffee_Gallery.py
97 lines (90 loc) 路 4.34 KB
/
Coffee_Gallery.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import os
import csv
import re
from glob import glob
import requests
#Welcome on Discord data package extractor.
#--- Proudly made by Ech0 ---")
print("----------------------------------------------")
print("Welcome to Discord DATAPACKAGE Extractor V1.0")
print("----------------------------------------------")
PATH_FOR_IMAGE_EXTRACTION = input("Please enter the path for the image extraction (PLEASE BE CAREFULL where you download the files): ")
PATH = input("Please enter the path to your data package: ")
# DO NOT MODIFY ANY PARAMETER IN THE CODE
if (PATH_FOR_IMAGE_EXTRACTION[len(PATH_FOR_IMAGE_EXTRACTION)-1] != "/"):
PATH_FOR_IMAGE_EXTRACTION = PATH_FOR_IMAGE_EXTRACTION + "/"
print("Path for extraction ok. (Added / at the end of the path)")
else :
print("Path for extraction ok.")
if (PATH[len(PATH)-1] != "/"):
PATH = PATH + "/"
print("Path to data package ok. (Added / at the end of the path)")
#----------
print("----------------------------------------------")
print("Here is the path for the package : "+PATH)
print("Here is the path to the destination folder : "+PATH_FOR_IMAGE_EXTRACTION)
print("----------------------------------------------")
before_start_breaker = 0
while (before_start_breaker == 0):
before_start = input("Extraction and download of all images is going to begin. Continue ? y / n :")
if (before_start == "n"):
print("Interrupted. Bye.")
exit()
if (before_start == "y"):
before_start_breaker = 1
print("Processing. . .")
#-- 1 -- EXTRACTING INTERESTING ELEMENTS FROM THE DATAPACKAGE
all_img_link = [] #All the links and other similar expression
final_links = [] #Will be used to store the images link list
EXT = "*.csv" #Select all CSV files in folder and subfolders.
all_csv_files = [file
for path, subdir, files in os.walk(PATH)
for file in glob(os.path.join(path, EXT))]
#-- 2 -- FILTERING FOR ALL IMAGES BELONGING TO DISCORD CDN
for csv_to_open in range(0, len(all_csv_files)): #Filtering all images on DiscordCDN
csv_trt = all_csv_files[csv_to_open]
cr = csv.reader(open(csv_trt))
for row in cr:
current_row = str(row)
if("https://cdn.discordapp.com" in current_row):
all_img_link.append(current_row)
#-- 3 -- DOING SOME TREATMENTS ON THE LIST TO MAKE IT USABLE
for row_links in range(0, len(all_img_link)): #Preparing the link : Removing any square braces, commas etc.
current_row_links = all_img_link[row_links]
for char in range(0, len(current_row_links)):
if current_row_links[char] == "h":
index_first_url_char = char
break;
for char in range(0, len(current_row_links)):
if current_row_links[char] == "'":
index_last_url_char = char
final_url = current_row_links[index_first_url_char:index_last_url_char]
trt = final_url.split()
for y in range(0, len(trt)) :
if trt[y] == "'":
index_last_url_char = y
final_url_ready = final_url[0:index_last_url_char]
final_links.append(final_url_ready)
#-- 4 -- PREPARING ALL THE URLS, CHECKING FOR DUPP AND FILTERING EVERYTHING THAT IS NOT A URL.
buffer_links_cleared = [] #Checking if all the elements in the list are urls, it may arrive that some characters appears as url.
for all_links in range(0, len(final_links)):
buffer_links = final_links[all_links]
if (buffer_links.startswith('https') == True and len(buffer_links) > 20): #All discord links len are > to 20 char. We also check it start with https
buffer_links_cleared.append(buffer_links)
final_links = buffer_links_cleared
final_links = list(dict.fromkeys(final_links)) #Deleting dupplicated in the final list.
#print(final_links) #USED TO DEBUG AND SEE ALL THE LINKS
print("----------------------------------------------")
print("NUMBERS OF ELEMENTS TO DOWNLOAD", len(final_links))
print("----------------------------------------------")
#-- 5 -- DOWLOADING ALL THE IMAGES
i = 0 #Used to generates the image name and the download tracking
for elements in range(0, len(final_links)) :
print("Downloading image : "+str(i)+" on "+str(len(final_links)))
file_name = PATH_FOR_IMAGE_EXTRACTION+str(i)+".png"
try:
with open(file_name, 'wb') as f:
f.write(requests.get(final_links[elements]).content)
except ValueError:
print("Image with index N* "+str(i)+" could not be extracted ;-(")
i = i+1