Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'master' of https://github.com/KevinOrtiz/An-lisis-Explo…
- Loading branch information
Showing
18 changed files
with
727 additions
and
134 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
import pandas as pd | ||
import dateutil.parser as dateparser | ||
import datetime | ||
import time | ||
|
||
|
||
def procesamiento_Timestamp(): | ||
datos = pd.read_csv('tweets_depurated_noText.csv') | ||
#datos['timestamstamp_ms'] = datos['timestamstamp_ms'].astype(int) | ||
#print datos['timestamstamp_ms'] | ||
datos['weekday'] = datos['timestamstamp_ms'].apply(valorWeekDay) | ||
datos['horas'] = datos['timestamstamp_ms'].apply(valorHoras) | ||
datos['day'] = datos['timestamstamp_ms'].apply(valorDay) | ||
datos['Sol_Weekend'] = datos['timestamstamp_ms'].apply(valorDayWeekend) | ||
#print datos['timestamstamp_ms'].apply(lambda x:valorDayWeekend) | ||
datos['Fecha'] = datos['timestamstamp_ms'].apply(Fecha) | ||
datos['diaSemana'] = datos['timestamstamp_ms'].apply(DiaSemana) | ||
datos['HoraTweet'] = datos['timestamstamp_ms'].apply(ZonaHoraria) | ||
|
||
datos.to_csv('ArchivoCategorizadoTimestamp.csv') | ||
|
||
def ZonaHoraria(x): | ||
y = datetime.datetime.fromtimestamp(x/1000.0) | ||
return y.strftime("%H:%M:%S") | ||
|
||
def DiaSemana(x): | ||
y = datetime.datetime.fromtimestamp(x/1000.0) | ||
return y.strftime('%A') | ||
|
||
def Fecha(x): | ||
y = datetime.datetime.fromtimestamp(x/1000.0) | ||
return y.date() | ||
|
||
|
||
def valorWeekDay(x): | ||
weekDay = datetime.datetime.fromtimestamp(x/1000.0) | ||
return weekDay.weekday() | ||
|
||
|
||
def valorHoras(x): | ||
hours = datetime.datetime.fromtimestamp(x/1000.0) | ||
return hours.hour | ||
|
||
|
||
def valorDay(x): | ||
hours = valorHoras(x) | ||
if (hours >= 7 and hours <= 18): | ||
return 1 | ||
else : | ||
return 0 | ||
|
||
|
||
|
||
def valorDayWeekend(x): | ||
hours = valorHoras(x) | ||
weekday = valorWeekDay(x) | ||
print '#####################' | ||
print 'hours' | ||
print hours | ||
print 'weekday' | ||
print weekday | ||
print '######################' | ||
esDia = 1 if hours <=18 and hours >= 7 else 0 | ||
esWeekend = 1 if weekday == 4 or weekday == 5 or weekday == 6 else 0 | ||
print '#####################' | ||
print 'esDia' | ||
print esDia | ||
print 'esWeekend' | ||
print esWeekend | ||
|
||
if esDia and esWeekend: | ||
return 1 | ||
else: | ||
return 0 | ||
|
||
|
||
if __name__ == '__main__': | ||
procesamiento_Timestamp() | ||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
Voice - Setear el numero de tweet por voice | ||
|
||
Cuantos usuarios hay en el dataset | ||
|
||
Filtrar tipos de usuario -> futuro | ||
|
||
Sacar el ranking por voice y numero de tweet | ||
|
||
Hacer un top 5 | ||
|
||
Sacar el degree promedio del grafo | ||
|
||
Colocarle pesos a los arcos | ||
|
||
Filtrar stopword en Ingles | ||
|
||
Filtrar lugares que son de personas locales | ||
|
||
Filtrar lugares que son de personas internacionales | ||
|
||
Top lugares de mayor helpful comment |
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
import pandas as pd | ||
import string | ||
|
||
|
||
def categoriasAlquileres(): | ||
|
||
|
||
datos = pd.read_csv('Alquileres.csv',index_col=0) | ||
|
||
datos['precio'] = datos['precio'].astype(float) | ||
|
||
datos['categorias'] = datos['precio'].map(lambda x: 'Expensive' if (x>300) | ||
else 'Normal' if (x<300 and x>200) | ||
else 'economics' if(x<200 and x>100) | ||
else 'unexpensive') | ||
|
||
datos.to_csv('alquileres_categorias.csv') | ||
|
||
|
||
if __name__ == '__main__': | ||
categoriasAlquileres() | ||
|
||
|
24 changes: 24 additions & 0 deletions
24
sourceCode/scrapyAlquileres/TransformarDatosAlquilerCsv.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
import json | ||
import csv | ||
import string | ||
def getDataJson(): | ||
with open('salidaAlquileres.json') as dataFile: | ||
dataJson = json.load(dataFile) | ||
|
||
csvAlquileres = open('Alquileres.csv','w') | ||
headAlquileres = 'name,longitude,latitude,precio' | ||
csvAlquileres.write(headAlquileres) | ||
|
||
for element in dataJson: | ||
location = element['posicion'] | ||
lng = location[0] | ||
lat = location[1] | ||
if not (lng is None and lat is None): | ||
name = element['tituloLugar'] | ||
name = string.replace(name,',','') | ||
precio = element['precio'] | ||
line = str(name) + ',' + str(lng) + ',' + str(lat) + ',' + str(precio)+'\n' | ||
csvAlquileres.write(line) | ||
|
||
if __name__ == '__main__': | ||
getDataJson() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
__author__ = 'cesar17' | ||
import HTMLParser | ||
import unicodedata | ||
import unidecode | ||
import string | ||
|
||
htmlparser = HTMLParser.HTMLParser() | ||
|
||
def is_ascii(s): | ||
return all(ord(c) < 128 for c in s) | ||
|
||
def clean_parsed_string(string): | ||
if len(string) > 0: | ||
ascii_string = string | ||
if is_ascii(ascii_string) == False: | ||
ascii_string = unicodedata.normalize('NFKD', ascii_string).encode('ascii', 'ignore') | ||
return str(ascii_string) | ||
else: | ||
return None | ||
|
||
def get_parsed_string(selector, xpath): | ||
return_string = '' | ||
extracted_list = selector.xpath(xpath).extract() | ||
if len(extracted_list) > 0: | ||
raw_string = extracted_list[0].strip() | ||
if raw_string is not None: | ||
|
||
raw_string = raw_string.strip() | ||
raw_string = unidecode.unidecode(raw_string) | ||
raw_string = string.replace(raw_string, '\n', '') | ||
raw_string = string.replace(raw_string, '\"', '') | ||
raw_string = string.replace(raw_string, ',', ' ') | ||
return_string = htmlparser.unescape(raw_string) | ||
return return_string | ||
|
||
def get_parsed_rating(selector, xpath): | ||
return_string = '' | ||
extracted_list = selector.xpath(xpath).extract() | ||
if len(extracted_list) > 0: | ||
raw_string = extracted_list[0].strip() | ||
if raw_string is not None: | ||
|
||
raw_string = raw_string.strip() | ||
raw_string = unidecode.unidecode(raw_string) | ||
raw_string = string.replace(raw_string, '\n', '') | ||
raw_string = string.replace(raw_string, '\"', '') | ||
raw_string = string.replace(raw_string, ',', ' ') | ||
raw_string = string.replace(raw_string, ' de 5 estrellas', '') | ||
return_string = htmlparser.unescape(raw_string) | ||
return return_string | ||
|
||
def get_parsed_string_multiple(selector, xpath): | ||
return_string = '' | ||
extracted_review = selector.xpath(xpath).extract() | ||
|
||
list_reviews = [] | ||
for review in extracted_review: | ||
if review: | ||
raw_string = review.strip() | ||
raw_string = unidecode.unidecode(raw_string) | ||
raw_string = string.replace(raw_string, '\n', '') | ||
raw_string = string.replace(raw_string, '\"', '') | ||
|
||
list_reviews.append(raw_string) | ||
|
||
return list_reviews |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.