# Globals

In [None]:
!pip install keras-tuner
!pip install scikit-plot

In [None]:
import numpy as np
import tensorflow as tf
import os

In [None]:
# Cargamos Dataset usado por Nacho para ML - Conexion Drive 
# Importar drive y pandas
from google.colab import drive
drive.mount('/content/drive')

project_id = 'angelic-cat-338018'
!gcloud config set project {project_id}

#Conexion Cloud
from google.colab import auth
auth.authenticate_user()


Mounted at /content/drive
Updated property [core/project].


# Feature extraction para image quality classification

In [None]:
!pip install scipy
!pip install tqdm
!pip install scikit-image
!pip install imutils



In [None]:
!pip install opencv-python==3.4.0.14
!pip install opencv-contrib-python==3.4.2.17

Collecting opencv-python==3.4.0.14
  Downloading opencv-python-3.4.0.14.tar.gz (87.3 MB)
[K     |████████████████████████████████| 87.3 MB 91 kB/s 
Building wheels for collected packages: opencv-python
  Building wheel for opencv-python (setup.py) ... [?25lerror
[31m  ERROR: Failed building wheel for opencv-python[0m
[?25h  Running setup.py clean for opencv-python
Failed to build opencv-python
Installing collected packages: opencv-python
  Attempting uninstall: opencv-python
    Found existing installation: opencv-python 4.1.2.30
    Uninstalling opencv-python-4.1.2.30:
      Successfully uninstalled opencv-python-4.1.2.30
    Running setup.py install for opencv-python ... [?25l[?25herror
  Rolling back uninstall of opencv-python
  Moving to /usr/local/lib/python3.7/dist-packages/cv2/
   from /usr/local/lib/python3.7/dist-packages/~v2
  Moving to /usr/local/lib/python3.7/dist-packages/opencv_python-4.1.2.30.dist-info/
   from /usr/local/lib/python3.7/dist-packages/~pencv_python-

## Importacion manual de la libreria de image classification, la hemos retocado

In [None]:
# -*- coding: utf-8 -*-
"""Image Features module.
This module is used to extract image features that can be later used for ad clicking prediction
  as in https://maths-people.anu.edu.au/~johnm/courses/mathdm/talks/dimitri-clickadvert.pdf .
Example:
        $ python example.py
"""
import os
import math
import numpy as np
import cv2
import imutils
import math
from scipy import ndimage
import skimage.segmentation
from skimage.feature import peak_local_max
from skimage.morphology import watershed
from tqdm import tqdm
from concurrent.futures import ProcessPoolExecutor, as_completed,ThreadPoolExecutor
import pandas as pd

def segment_image_watershed(image):
	"""
	Args:
		image (numpy array): input colored image
	Returns:
		list: watershed output labels.
	"""
	gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
	thresh = cv2.threshold(gray, 0, 255,cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
	# compute the exact Euclidean distance from every binary
	# pixel to the nearest zero pixel, then find peaks in this
	# distance map
	D = ndimage.distance_transform_edt(thresh)
	localMax = peak_local_max(D, indices=False, min_distance=20,labels=thresh)
	markers = ndimage.label(localMax, structure=np.ones((3, 3)))[0]
	labels = skimage.segmentation.watershed(-D, markers, mask=thresh)
	return labels


def calculate_image_simplicity(image,c_threshold = 0.01,nchannels=3,nbins =8):

	"""
	Args:
		image (numpy array): input colored image
		c_threshold (float 0-1): threshold on the maximum of the histogram value to be used in the output simplicity feature
		nchannel(int): 3 for colored images and 1 for grayscale
		nbins(int): number of bins used to calculate histogram
	Returns:
		tuple: returns 2 features representing image simplicity .
	"""
	
	feature_1 = 0
	max_bin = -1
	max_channel = -1
	bin_index = -1
	for channel in  range(nchannels):
		hist = cv2.calcHist(image, [channel], None,[nbins],[0,256])
		maximum = hist.max()
		feature_1 += np.sum([1 if hist[i]>=(c_threshold*maximum) else 0 for i in range(8)])

		if max_bin<maximum:
			max_bin = maximum
			max_channel = channel
			bin_index = np.where(hist == max_bin)[0]

	feature_2 = max_bin *100.0 /  image.flatten().shape[0]
	return feature_1,feature_2	

def get_segmented_image(image,labels,segment_id):
	"""
	Args:
		image (numpy array): input colored image
		labels : output labels from watershed calling from segment_image_watershed
	Returns:
		tuple: returns 2 features representing image simplicity .
	"""
	mask = np.zeros(image.shape, dtype="uint8")
	mask[labels == segment_id] = 1
	return image *mask

def image_basic_segment_stats(image):
	"""
	Args:
		image (numpy array): input colored image
	Returns:
		tuple: returns segmentation statistics (10 features as in the paper of ad clicking).
	"""
	labels = segment_image_watershed(image)
	n_segments =  len(np.unique(labels)) - 1
	regions_size = []
	max_region = -1
	max_region_index = -1
	for segment_label in range(1,n_segments+1):
		n_pixels = np.count_nonzero(labels == segment_label)
		regions_size.append(n_pixels)
		if n_pixels>max_region:
			max_region = n_pixels
			max_region_index = segment_label

	regions_size.sort()
	if len(regions_size)>=2:
		contrast_segments_size = regions_size[-1]-regions_size[0]
		ratio_largest_component = regions_size[-1]*100.0 / image.flatten().shape[0]
		ratio_second_largest_component = regions_size[-2]*100 / image.flatten().shape[0]
	else:
		contrast_segments_size = -1
		ratio_largest_component = -1
		ratio_second_largest_component = -1
	image_segmented = get_segmented_image(image,labels,max_region_index)
	hue_1,hue_2,hue_3 = image_hue_histogram(image_segmented)
	bright_1,bright_2,bright_3,_ = image_brightness(image_segmented)
	return n_segments,contrast_segments_size,ratio_largest_component\
	,ratio_second_largest_component,hue_1,hue_2,hue_3\
	,bright_1,bright_2,bright_3

def image_face_feats(image):
	"""
	Args:
		image (numpy array): input colored image
	Returns:
		int: number of faces in the input image based on pretrained opencv haarcascade for face and eyes .
	"""
	current_dir = os.path.dirname(os.path.realpath(__file__))
	face_cascade = cv2.CascadeClassifier(os.path.join(current_dir,'models','haarcascade_frontalface_default.xml'))
	gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
	faces = face_cascade.detectMultiScale(gray, 1.3, 5)
	eye_cascade = cv2.CascadeClassifier(os.path.join(current_dir,'models','haarcascade_eye.xml'))
	eyes = eye_cascade.detectMultiScale(gray)
	nfaces = 0
	if (len(eyes)/2)>len(faces):
		nfaces = len(eyes)/2
	else:
		nfaces = len(faces)
	return nfaces

def image_sift_feats(image):
	"""
	Args:
		image (numpy array): input colored image
	Returns:
		int: number of keypoints from sift
	"""
	gray= cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
	sift = cv2.xfeatures2d.SIFT_create()
	kp = sift.detect(gray,None)
	return len(kp)

def image_rgb_simplicity(image):
	"""
	Args:
		image (numpy array): input colored rgb image
	Returns:
		image simplicity features 
	"""
	return calculate_image_simplicity(image)

def image_hsv_simplicity(image):
	"""
	Args:
		image (numpy array): input colored RGB image
	Returns:
		image simplicity features for HSV images
	"""
	image =  cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
	return calculate_image_simplicity(image,0.05,1,20)

def image_hue_histogram(image):
	"""
	Args:
		image (numpy array): input colored  image
	Returns:
		image  features from hue histogram
	"""
	image =  cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
	(H,S,V) =  cv2.split(image.astype("float"))
	hist = cv2.calcHist(image, [0], None,[20],[0,256])
	c_threshold = 0.01
	maximum = hist.max()
	feature_1 = np.sum([1 if hist[i]>=(c_threshold*maximum) else 0 for i in range(20)])
	max_2 = -1

	for i in range(20):
		if hist[i]==maximum:
			continue
		if hist[i]>max_2:
			max_2 = hist[i]
	feature_2 = maximum-max_2

	return feature_1,feature_2[0],np.std(H)


def image_grayscale_simplicity(image):
	"""
	Args:
		image (numpy array): input colored  image
	Returns:
		image simplicity features based on grayscale image
	"""
	image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
	std = np.std(image)
	hist = cv2.calcHist(image, [0], None,[256],[0,256])
	maximum = hist.max()
	c_threshold = 0.01
	feature_2 = np.sum([1 if hist[i]>=(c_threshold*maximum) else 0 for i in range(256)])
	prune = int((2.5*1.0*255)/100)
	hist = hist[prune:255-prune]
	features_1 = 0
	for itm in hist:
		if itm>0:
			features_1+=1
	

	return features_1,feature_2,std

def image_sharpness(image):
	"""
	Args:
		image (numpy array): input colored  image
	Returns:
		image sharpness features on grayscale image
	"""
	image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
	return cv2.Laplacian(image, cv2.CV_64F).var()


def image_contrast(image):
	"""
	Args:
		image (numpy array): input colored  image
	Returns:
		image contrast features on HSV image
	"""
	image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
	(Y,U,V) =  cv2.split(image.astype("float"))
	std = np.std(Y)
	maximum = Y.max()
	minimum = Y.min()
	if (maximum-minimum)<=0:
		return 0
	return std*1.0/(maximum-minimum)

def image_saturation(image):
	"""
	Args:
		image (numpy array): input colored  image
	Returns:
		image saturation features on HSV image
	"""
	image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
	(H,S,V) =  cv2.split(image.astype("float"))
	mean = np.mean(S)
	std = np.std(S)
	max_saturation = S.max()
	min_saturation = S.min()
	return mean,std,max_saturation,min_saturation

def image_brightness(image):
	"""
	Args:
		image (numpy array): input colored  image
	Returns:
		image brightness features on YUV image
	"""
	image = cv2.cvtColor(image, cv2.COLOR_BGR2YUV)
	(Y,U,V) =  cv2.split(image.astype("float"))
	mean = np.mean(Y)
	std = np.std(Y)
	max_brightness = Y.max()
	min_brightness = Y.min()
	return mean,std,max_brightness,min_brightness


def image_colorfulness(image):
	"""
	Args:
		image (numpy array): input colored  image
	Returns:
		image colorfullness features as discussed in the paper
	"""
	# split the image into its respective RGB components
	(B, G, R) = cv2.split(image.astype("float"))
 
	# compute rg = R - G
	rg = np.absolute(R - G)
 
	# compute yb = 0.5 * (R + G) - B
	yb = np.absolute(0.5 * (R + G) - B)
 
	# compute the mean and standard deviation of both `rg` and `yb`
	(rbMean, rbStd) = (np.mean(rg), np.std(rg))
	(ybMean, ybStd) = (np.mean(yb), np.std(yb))
 
	# combine the mean and standard deviations
	stdRoot = np.sqrt((rbStd ** 2) + (ybStd ** 2))
	meanRoot = np.sqrt((rbMean ** 2) + (ybMean ** 2))
 
	# derive the "colorfulness" metric and return it
	return stdRoot + (0.3 * meanRoot)

def parallel_process(array, function, n_jobs=3, use_kwargs=False, front_num=1):
    """
        A parallel version of the map function with a progress bar. 
        Args:
            array (array-like): An array to iterate over.
            function (function): A python function to apply to the elements of array
            n_jobs (int, default=3): The number of cores to use
            use_kwargs (boolean, default=False): Whether to consider the elements of array as dictionaries of 
                keyword arguments to function 
            front_num (int, default=3): The number of iterations to run serially before kicking off the parallel job. 
                Useful for catching bugs
        Returns:
            [function(array[0]), function(array[1]), ...]
    """
    #We run the first few iterations serially to catch bugs
    if front_num > 0:
        front = [function(**a) if use_kwargs else function(a) for a in array[:front_num]]
    #If we set n_jobs to 1, just run a list comprehension. This is useful for benchmarking and debugging.
    if n_jobs==1:
        return front + [function(**a) if use_kwargs else function(a) for a in tqdm(array[front_num:])]
    #Assemble the workers
    with ThreadPoolExecutor(max_workers=n_jobs) as pool:
        #Pass the elements of array into function
        if use_kwargs:
            futures = [pool.submit(function, **a) for a in array[front_num:]]
        else:
            futures = [pool.submit(function, a) for a in array[front_num:]]
        kwargs = {
            'total': len(futures),
            'unit': 'it',
            'unit_scale': True,
            'leave': True
        }
        #Print out the progress as tasks complete
        for f in tqdm(as_completed(futures), **kwargs):
            pass
    out = []
    #Get the results from the futures. 
    for i, future in tqdm(enumerate(futures)):
        try:
            out.append(future.result())
        except Exception as e:
        	print(e)
    return front + out

def get_image_all_feats(image_path,img_width = 256):
	"""
	Args:
		img_path (str): takes image path as input
		img_width (int): with to which the image is resized
	Returns:
		a tuple of all features from the previous functions including file name at the start
	"""
	image = cv2.imread(image_path)
	image = imutils.resize(image,width=img_width)
	n_segments,contrast_segments_size,ratio_largest_component,ratio_second_largest_component,segment_hue_1,segment_hue_2,segment_hue_3,segment_bright_1,segment_bright_2,segment_bright_3 = image_basic_segment_stats(image)
	#n_faces = image_face_feats(image)
	n_sift = image_sift_feats(image)
	rgb_simple_1,rgb_simple_2 = image_rgb_simplicity(image)
	hsv_simple_1,hsv_simple_2 = image_hsv_simplicity(image)
	gray_simple_1,gray_simple_2,gray_simple_3 = image_grayscale_simplicity(image)
	hue_hist_1,hue_hist_2,hue_hist_3 = image_hue_histogram(image)
	sharpness = image_sharpness(image)
	contrast = image_contrast(image)
	colorful = image_colorfulness(image)
	sat_1,sat_2,sat_3,sat_4 = image_saturation(image)
	bright_1,bright_2,bright_3,bright_4 = image_brightness(image) 
	return os.path.basename(image_path),\
	n_segments,contrast_segments_size,ratio_largest_component,ratio_second_largest_component,\
	segment_hue_1,segment_hue_2,segment_hue_3,\
	segment_bright_1,segment_bright_2,segment_bright_3,n_sift,rgb_simple_1,rgb_simple_2,hsv_simple_1,hsv_simple_2,\
	hue_hist_1,hue_hist_2,hue_hist_3,gray_simple_1,gray_simple_2,gray_simple_3 ,sharpness,contrast,colorful,\
	sat_1,sat_2,sat_3,sat_4,bright_1,bright_2,bright_3,bright_4

def extract_image_feats(out_name,file_list,n_jobs=3):
	"""
	Args:
		out_name (str): name of the output file results
		file_list (list): list of input files
	Returns:
		write to out_name a dataframe including image name and all extracted features
	"""
	scores = parallel_process(file_list,get_image_all_feats,n_jobs=n_jobs)
	image_data = pd.DataFrame( scores,columns=['image','n_segments','contrast_segments_size','ratio_largest_component'\
			,'ratio_second_largest_component','segment_hue_1','segment_hue_2','segment_hue_3','segment_bright_1',\
			'segment_bright_2','segment_bright_3','n_sift','rgb_simple_1','rgb_simple_2','hsv_simple_1','hsv_simple_2',\
			'hue_hist_1','hue_hist_2','hue_hist_3','gray_simple_1','gray_simple_2','gray_simple_3',\
			'sharpness', 'contrast','colorful','sat_1','sat_2','sat_3','sat_4','bright_1','bright_2','bright_3','bright_4'])
	image_data.to_csv(out_name,index=False)
 


## Feature Extraction

In [None]:
dataset_url = "https://storage.googleapis.com/tfmbucket1/images/train/Dataset.tar"
data_dir = tf.keras.utils.get_file('Dataset', origin=dataset_url, untar=True)
#data_dir = pathlib.Path(data_dir)

Downloading data from https://storage.googleapis.com/tfmbucket1/images/train/Dataset.tar


In [None]:
fullimglist = []

for folder in os.listdir(data_dir):
  partialimglist = []
  fldir = data_dir + "/" + folder
  for image in os.listdir(fldir):
      imgdir = fldir + '/' + image
      partialimglist.append(imgdir)

  fullimglist.append(list(partialimglist))

In [None]:
for i in range(0,14):
  filename = "Image_Dataset_with_Features_" + str(i) + ".txt"
  extract_image_feats(filename, fullimglist[i], 10000)

In [None]:
!gsutil cp -r /content/Image_Dataset_with_Features_11.txt gs://tfmbucket1/files/Image_Dataset_with_Features_11.txt

Copying file:///content/Image_Dataset_with_Features_11.txt [Content-Type=text/plain]...
-
Operation completed over 1 objects/18.1 MiB.                                     


In [None]:
# Pasar del cloud a Colab
!gsutil -m cp gs://tfmbucket1/files/Image_Dataset_with_Features_0.txt  /content/Image_Dataset_with_Features_0 
!gsutil -m cp gs://tfmbucket1/files/Image_Dataset_with_Features_1.txt  /content/Image_Dataset_with_Features_1 
!gsutil -m cp gs://tfmbucket1/files/Image_Dataset_with_Features_2.txt  /content/Image_Dataset_with_Features_2 
!gsutil -m cp gs://tfmbucket1/files/Image_Dataset_with_Features_3.txt  /content/Image_Dataset_with_Features_3 
!gsutil -m cp gs://tfmbucket1/files/Image_Dataset_with_Features_4.txt  /content/Image_Dataset_with_Features_4 
!gsutil -m cp gs://tfmbucket1/files/Image_Dataset_with_Features_5.txt  /content/Image_Dataset_with_Features_5 
!gsutil -m cp gs://tfmbucket1/files/Image_Dataset_with_Features_6.txt  /content/Image_Dataset_with_Features_6 
!gsutil -m cp gs://tfmbucket1/files/Image_Dataset_with_Features_7.txt  /content/Image_Dataset_with_Features_7 
!gsutil -m cp gs://tfmbucket1/files/Image_Dataset_with_Features_8.txt  /content/Image_Dataset_with_Features_8 
!gsutil -m cp gs://tfmbucket1/files/Image_Dataset_with_Features_9.txt  /content/Image_Dataset_with_Features_9 
!gsutil -m cp gs://tfmbucket1/files/Image_Dataset_with_Features_10.txt  /content/Image_Dataset_with_Features_10 
!gsutil -m cp gs://tfmbucket1/files/Image_Dataset_with_Features_11.txt  /content/Image_Dataset_with_Features_11 
!gsutil -m cp gs://tfmbucket1/files/Image_Dataset_with_Features_12.txt  /content/Image_Dataset_with_Features_12 
!gsutil -m cp gs://tfmbucket1/files/Image_Dataset_with_Features_13.txt  /content/Image_Dataset_with_Features_13 

Copying gs://tfmbucket1/files/Image_Dataset_with_Features_0.txt...
/ [1/1 files][  2.1 MiB/  2.1 MiB] 100% Done                                    
Operation completed over 1 objects/2.1 MiB.                                      
Copying gs://tfmbucket1/files/Image_Dataset_with_Features_1.txt...
- [1/1 files][ 34.3 MiB/ 34.3 MiB] 100% Done                                    
Operation completed over 1 objects/34.3 MiB.                                     
Copying gs://tfmbucket1/files/Image_Dataset_with_Features_2.txt...
- [1/1 files][ 22.2 MiB/ 22.2 MiB] 100% Done                                    
Operation completed over 1 objects/22.2 MiB.                                     
Copying gs://tfmbucket1/files/Image_Dataset_with_Features_3.txt...
/ [1/1 files][  2.3 MiB/  2.3 MiB] 100% Done                                    
Operation completed over 1 objects/2.3 MiB.                                      
Copying gs://tfmbucket1/files/Image_Dataset_with_Features_4.txt...
\ [1/1 files]

In [None]:
#Combinar los TXT en 1
import os
filename_list = [f for f in os.listdir("/content") if os.path.isfile(f)] 
filename_list

In [None]:
import os
for file in os.listdir("/content"):
    if file.startswith("Image"):
        print(os.path.join("/content", file))

/content/Image_Dataset_with_Features_3
/content/Image_Dataset_with_Features_1
/content/Image_Dataset_with_Features_13
/content/Image_Dataset_with_Features_5
/content/Image_Dataset_with_Features_2
/content/Image_Dataset_with_Features_7
/content/Image_Dataset_with_Features_0
/content/Image_Dataset_with_Features_12
/content/Image_Dataset_with_Features_8
/content/Image_Dataset_with_Features_10
/content/Image_Dataset_with_Features_4
/content/Image_Dataset_with_Features_6
/content/Image_Dataset_with_Features_11
/content/Image_Dataset_with_Features_9


In [None]:
#Image_Dataset_with_Features_Total archivo que contiene todas las imagenes 578745

In [None]:
from google.colab import files
uploaded = files.upload()

file_name = "Image_Dataset_with_Features_Total.txt"
#uploaded[file_name].decode("utf-8")
uploaded[file_name].decode("utf-8").split("\r\n")

Saving Image_Dataset_with_Features_Total.txt to Image_Dataset_with_Features_Total.txt


In [None]:
file_name = "Image_Dataset_with_Features_Total.txt"
data = uploaded[file_name].decode("utf-8").split("\r\n")

for i in range(len(data)):
  data[i] = data[i].split(",")

print(data)

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [None]:
import pandas as pd
import io

io.StringIO(uploaded["Image_Dataset_with_Features_Total.txt"].decode("utf-8"))
data2 = pd.read_csv(io.StringIO(uploaded["Image_Dataset_with_Features_Total.txt"].decode("utf-8")))

In [None]:
data2

Unnamed: 0,image,n_segments,contrast_segments_size,ratio_largest_component,ratio_second_largest_component,segment_hue_1,segment_hue_2,segment_hue_3,segment_bright_1,segment_bright_2,segment_bright_3,n_sift,rgb_simple_1,rgb_simple_2,hsv_simple_1,hsv_simple_2,hue_hist_1,hue_hist_2,hue_hist_3,gray_simple_1,gray_simple_2,gray_simple_3,sharpness,contrast,colorful,sat_1,sat_2,sat_3,sat_4,bright_1,bright_2,bright_3,bright_4
0,7bea094a54efcf37f76059cb933013b738de045de215af...,18,6455,3.283691,3.240967,11,500.0,18.432064,15.760803,49.193149,255.0,514,19,0.155640,7,0.109863,16,68.0,43.043332,70,72,51.366289,1378.400299,0.240466,30.421050,71.075439,51.015082,255.0,0.0,111.655334,51.366289,255.0,0.0
1,6d9bbdbdcf549c3555a8892783e5bb3f747e1ae1084995...,7,10169,5.176290,4.484049,4,104.0,23.862907,37.948608,88.610733,253.0,304,3,0.390625,2,0.130208,3,256.0,19.590166,1,1,70.374030,764.779358,0.118728,33.790031,52.975098,60.053323,255.0,0.0,155.839844,70.374030,254.0,3.0
2,103df08dce3ae12bff70d67aab241d8c85584dd6c17f19...,20,2204,1.123047,1.004028,1,768.0,15.257647,6.122650,33.459258,255.0,396,19,0.116984,8,0.064087,16,60.0,41.167887,81,81,49.215557,1217.984039,0.229988,24.998491,76.055115,53.896540,255.0,0.0,95.667236,49.215557,255.0,0.0
3,29c8c4dbb1eb5f44cdc354e3ca329e295c6c60f5d7265b...,5,27131,14.460754,8.842977,1,768.0,54.633903,88.562592,101.575306,243.0,243,12,0.182088,8,0.064087,13,10.0,25.082759,73,74,57.224767,867.440369,0.140127,8.957137,21.959473,18.756849,255.0,0.0,178.429504,57.224767,254.0,1.0
4,a2720fed454224e28c627596e10b6efa7ae2852046b53f...,16,3176,1.619466,1.367188,1,768.0,23.341232,7.571259,34.412286,247.0,398,12,0.158691,8,0.091553,18,108.0,43.427025,67,67,49.493505,939.925293,0.242609,43.965756,116.401672,39.562753,255.0,0.0,77.035400,49.493505,255.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
578739,9c88dc36124fe5784f1b0b56fb49196f56044d978a563b...,12,14208,7.228597,4.120382,10,282.0,42.896617,35.760498,68.728343,229.0,54,15,0.182088,6,0.081380,17,32.0,19.906354,63,63,58.646950,307.830353,0.117096,10.157104,45.866089,35.758925,186.0,0.0,94.153992,58.646950,229.0,7.0
578740,b91163214a1548c61bc44b2a4cc0ab2b418fd8cb5feb3f...,11,5051,2.583313,2.425130,1,768.0,4.524090,13.229004,46.001032,209.0,263,15,0.159709,7,0.110881,17,104.0,9.544983,41,41,50.745358,627.526306,0.053324,65.538673,183.912292,41.478530,255.0,0.0,105.838013,50.745358,212.0,0.0
578741,7bd106e5e5f929044a54e31d9dbdac7ebdeacc619d3a9a...,14,14491,7.388814,4.670715,1,768.0,11.131026,39.657425,74.541982,240.0,184,14,0.230916,2,0.123088,12,8.0,37.184862,46,46,43.224492,456.841888,0.207737,15.366801,28.211792,24.993975,226.0,0.0,147.169617,43.224492,254.0,11.0
578742,45fd69aede4414c5f6b5a4e4cab8de426070ee121d20d6...,21,17550,8.928426,4.519653,1,768.0,46.883124,46.736053,77.930282,236.0,399,15,0.192261,8,0.079346,15,24.0,25.684570,56,57,34.777617,906.393555,0.145111,27.530314,38.462769,44.419532,255.0,0.0,147.820679,34.777617,255.0,1.0


In [None]:
data2.to_csv('data.csv')
!cp data.csv "/content/"

cp: 'data.csv' and '/content/data.csv' are the same file
