#Ejemplo que muestra como recolectar y preparar datos sobre Naves Imperiales de Star Wars


In [None]:
#@title Librerías

import sys, os, re, random
import urllib, time, base64
import urllib.parse
import string
import requests
import numpy as np

from google.colab import files


print("Librerías cargadas.")

Librerías cargadas.


#Recolectar Datos de Naves Imperiales

Fuente de datos: http://insd.swcombine.com/index.html

Atributos disponibles:
*  Model: The ship's class, name or model number.
*  Manufacturer: The ship's manufacturer or designer.
*  Designation: The ship's combat designation or classification.
* Length: The ship's length measured in meters.
* Crew: The ship's total number of crew members - excluding droids.
* Troops: The ship's total number of troops or passengers it can carry.
* Cargo Capacity: The ship's cargo capacity measured in kilograms or metric tons.
* Consumables: How long time the ship can operate without stopping to refuel or restock stores of food, water or air.
* Hyperdrive Multiplier: The ship's main hyperdrive class rating. This measures how quickly the ship travels a hyperspace route.
* Hyperdrive Backup: The ship's backup hyperdrive class rating. This measures how quickly the ship travels a hyperspace route if its main hyperdrive is disabled.
* Speed: The ship's sublight speed rating measured in MegaLights (MGLT).
* Hull: The ship's hull/armor durability rating measured in Resistance Units (RU).
* Shields: The ship's deflector shield durability rating measured in Shield Base Durability (SBD).
* Special Features: The ship's special design features.
* Weapons: The ship's standard armament listing.
* Onboard Craft: A listing of the ship's onboard support vessels. The listing is excluding all Barges and Tugs.

In [None]:
#@title Extraer datos disponibles en la página

# funciones para manejar campos pelicula

def strbuscarEntre(text, key1, key2):
  return strbuscarEntreList(text, [key1], [key2])

def strbuscarEntreList(text, listkey1, listkey2):
 p1 = -1
 for key1 in listkey1:
  auxP1 = text.find(key1)
  if (auxP1 > -1) and ((p1 == -1) or (auxP1 < p1)):
    p1 = auxP1
 if p1 == -1:
  return ""
 else:
  p1 = p1+len(key1)
  p2 = -1
  for key2 in listkey2:
    auxP2 = text[p1:].find(key2)
    if (auxP2 > -1) and ((p2 == -1) or (auxP2 < p2)):
      p2 = auxP2
  if p2 == -1:
    return ""
  else:
    p2 = p1+p2
  return text[p1:p2]

def limpiar(text):
  if text is None:
    return ""
  else:
    text = text.replace("&quot;", " ")
    text = text.replace("&amp;", " & ")
    text = text.replace("&nbsp;", "")
    text = text.replace("\r\n", "")
    text = text.replace("  ", " ")
    text = text.replace("  ", " ")
    return text.strip()


class INSDParser:

 def __init__(self, baseURL="http://insd.swcombine.com/insd/", debug=False):
  self.__debug = debug
  self.__baseURL = baseURL
  self.__AttNames = ["Model", "Manufacturer", "Length", "Crew", "Troops", "Cargo Capacity", "Consumables", "Hyperdrive Multiplier", "Hyperdrive Backup", "Speed", "Hull", "Shields", "Special Features", "Weapons", "Onboard Craft"]
  print("\n** Parser de 'IMPERIAL NAVY SHIP DATABASE' incializado ** \n")

 def __getHTMLContent(self, url):
  return requests.get(url).text

 def extractSiteLinks(self, searchURL):
  # ejecuta y obtiene la página
  searchData = self.__getHTMLContent(searchURL)
  # extrae los links
  auxList = re.findall('<A HREF=\"([\-\_a-zA-Z0-9]+\.htm)', searchData)
  if auxList is None:
    if self.__debug:
      print("\t -- No se encuentra links de Naves!!!")
    return None
  # saca duplicados
  linksList = list(set(auxList))
  if self.__debug:
    print("\t = lista de links (", len(linksList), "): ", linksList)
  # devuelve links de obtenidos
  return linksList, searchData

 def fetchNavesInfo(self, url, shipListData):
  # carga los datos de la página
  pagedData = self.__getHTMLContent(self.__baseURL + url)
  # obtiene datos
  matchStr = strbuscarEntre(pagedData, '<BLOCKQUOTE>', '</BLOCKQUOTE>')
  if (matchStr is None) or (matchStr == ""):
    print("\t -- No se puede encontrar datos en ", url)
    return None
  # extrae datos de la página
  naveData = {}
  # agrega nombre
  naveData["Name"] = limpiar(strbuscarEntre(pagedData, '<TITLE>', '</TITLE>'))
  # extra tipo de nave de la lista
  naveData["Ship Type"] = "???"
  posName = shipListData.find(url)
  if posName > 0:
      auxData = shipListData[:posName]
      posType = auxData.rfind('<B>')
      if posType > 0:
        auxData = shipListData[posType-1:posName]
        naveData["Ship Type"] = strbuscarEntre(auxData, '<B>', '</B>')
  # extrae lista atributos
  for att in self.__AttNames:
    auxStr = strbuscarEntre(matchStr, att+":", "<")
    if auxStr != "":
      naveData[att] = auxStr
  # limpia datos extraidos
  for k in naveData.keys():
    naveData[k] = limpiar(naveData[k])
  # agrega links
  naveData["URL_data"] =  self.__baseURL + url
  naveData["URL_image"]  = self.__baseURL + 'Ship_files/' + strbuscarEntre(pagedData, '<IMG SRC=\"Ship_files/', '"')
  return naveData


 def fetchAllNavesInfo(self, url="link.htm"):
  # carga los datos de la página
  linksList, shipListData = self.extractSiteLinks( self.__baseURL + url)
  if linksList is None:
    return None
  navesList = []
  for urlNave in linksList:
    if urlNave not in ["other2.htm", "techspec.htm"]:
      # busca la información de la nave
      naveInfo = self.fetchNavesInfo(urlNave, shipListData)
      if naveInfo is not None:
        navesList.append(naveInfo)
  return navesList


mostar_detalle = True #@param {type:"boolean"}

# busca y recolecta
parser = INSDParser(debug=mostar_detalle)
navesList = parser.fetchAllNavesInfo()

# muestra
if navesList is not None:
  print("")
  print("Se recolectaron ", len(navesList), " naves.")
  print("")
  if mostar_detalle:
    for nave in navesList:
      for k in nave.keys():
        print("  " + k + " = " + nave[k])
      print("")


** Parser de 'IMPERIAL NAVY SHIP DATABASE' incializado ** 

	 = lista de links ( 109 ):  ['guardian.htm', 'tieaggre.htm', 'other2.htm', 'kappa.htm', 'dsd.htm', 'corvet.htm', 'isd2.htm', 'lawclass.htm', 'escshut.htm', 'tiestrcr.htm', 'dungeon.htm', 'toscanf.htm', 'dropship.htm', 'tiephantom.htm', 'tiesub2.htm', 'tieexm4.htm', 'tievan.htm', 'tieexm3.htm', 'tieintrd.htm', 'tiehbomb.htm', 'y4raptor.htm', 'lamshut.htm', 'tiefighter.htm', 'esccar.htm', 'xmmiss.htm', 'tiedroid.htm', 'assshu2.htm', 'tieexm1.htm', 'tiefc.htm', 'shadowdr.htm', 'vsd2.htm', 'isp.htm', 'tiegt.htm', 'Acclamator.htm', 'tieexm2.htm', 'tierc.htm', 'assincor.htm', 'tieboat.htm', 'eclip.htm', 'tiebomber.htm', 'tierpt2.htm', 'svelte.htm', 'techspec.htm', 'tiehunt.htm', 'impcusve.htm', 'ipv1.htm', 'vensd.htm', 'tiest.htm', 'tiecrawl.htm', 'impcusfr.htm', 'tiechiss.htm', 'tiesub.htm', 'dread.htm', 'gunship.htm', 'tiercold.htm', 'isd1.htm', 'tiex2.htm', 'dominator.htm', 'decimator.htm', 'tiex1.htm', 'tiebt.htm', 'tiev38.htm

#Revisar Datos de Naves Imperiales

In [None]:
#@title Crear un DataFrame de los datos recolectados
import pandas as pd

df = pd.DataFrame.from_dict(navesList)

display( df )

Unnamed: 0,Name,Ship Type,Model,Manufacturer,Length,Crew,Troops,Cargo Capacity,Consumables,Hyperdrive Multiplier,Hyperdrive Backup,Speed,Hull,Shields,Weapons,URL_data,URL_image,Special Features,Onboard Craft
0,Guardian Class Cruiser,Patrol Craft,Guardian Class Cruiser,Sienar Fleet Systems,42 meters,20,None (+10 prisoners),200 metric tons,3 months,x1,x10,75 MGLT,88 RU,120 SBD,2 Laser Cannons and 2 Laser Cannon Turrets.,http://insd.swcombine.com/insd/guardian.htm,http://insd.swcombine.com/insd/Ship_files/GUAR...,,
1,TIE Aggressor,TIE Fighters,TIE/agg Fighter,Santhe / Sienar Technologies,7.8 meters,2,,150 kilograms,5 days,x4,,95 MGLT,15 RU,21 SBD,"2 Medium Laser Cannons, 2 Concussion Missile L...",http://insd.swcombine.com/insd/tieaggre.htm,http://insd.swcombine.com/insd/Ship_files/TIEA...,,
2,Kappa Class Landing Craft,Landing Craft,Kappa Class Landing Craft,Republic Sienar Systems,35 meters,4,40 + 10 AT-PTs,50 metric tons,1 month,x1,x10,71 MGLT,25 RU,60 SBD,1 Double Blaster Cannon Turret and 2 Double Bl...,http://insd.swcombine.com/insd/kappa.htm,http://insd.swcombine.com/insd/Ship_files/Kapp...,,
3,Death Star Defender,Other Starfighters,Death Star Defender,CPG Space Products,5 meters,1,,50 kilograms,1 week,,,110 MGLT (90 MGLT with attack positioned wings),7 RU,10 SBD,2 Laser Cannons,http://insd.swcombine.com/insd/dsd.htm,http://insd.swcombine.com/insd/Ship_files/DSD.JPG,Adjustable Attack Wing design.,
4,Corellian CR90 Corvette,Medium Ships,Corellian CR90 Corvette,Corellian Engineering Corporation,153 meters,30-165 (depending upon configuration),1-600 (depending upon configuration),"3,000 metric tons",1 year,x2,,22 MGLT,188 RU,400 SBD,2 Double Turbolaser Cannons and 4 Turbolaser C...,http://insd.swcombine.com/insd/corvet.htm,http://insd.swcombine.com/insd/Ship_files/CORC...,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
102,Adz Class Destroyer,Medium Ships,Ads Class Destroyer,Imperial Shipyards,150 meters,36,8,400 metric tons,6 months,x0.55,x12,83 MGLT,162 RU,600 SBD,3 Quad Laser Cannon and 3 Ion Cannon Batteries.,http://insd.swcombine.com/insd/adzclass.htm,http://insd.swcombine.com/insd/Ship_files/NOPI...,,6 TIE Series Fighters.
103,Interdictor Class Cruiser,Heavy Ships,Interdictor Class Cruiser,Sienar Fleet Systems,598 meters,2807,80,"5,500 metric tons",1.2 years,x2,x8,8 MGLT,960 RU,"2,240 SBD",20 Quad Laser Cannons and 4 Gravity Well Proje...,http://insd.swcombine.com/insd/interdictor.htm,http://insd.swcombine.com/insd/Ship_files/Inte...,Four Gravity Well Projectors,12 TIE Series Fighters and 4 Light Transports.
104,Delta Class Dx-9s Transport,Transporters,Delta Class Dx-9s Transport,Telgorn Corporation,18 meters,3-5,30-40 (Depending on configuration),100 metric tons,1 week,x2,x18,56 MGLT,78 RU,114 SBD,2 Laser Cannons and 1 Hull-Cutting Airlock.,http://insd.swcombine.com/insd/deltrn2.htm,http://insd.swcombine.com/insd/Ship_files/STMT...,,
105,Scimitar Assault Bomber,TIE Bombers,Scimitar Assault Bomber,Sienar Fleet Systems,13.8 meters,2,,200 kilograms,2 days,,,90 MGLT,34 RU,30 SBD,2 Laser Cannons and 16 Concussion Missile Laun...,http://insd.swcombine.com/insd/tiescimi.htm,http://insd.swcombine.com/insd/Ship_files/TIES...,,


In [None]:
#@title Mostrar Estadísticas de datos recolectados


# variables auxiliares
atributo_clase = ""

# configura para que muestre todas las columnas y filas
pd.options.display.max_rows = 100
pd.options.display.max_columns = 100

# devuelve listas de columnas numéricas y no numéricas
def devolNombreColumnas(ndf):
  colValues = []
  colNoValues = []
  for col in ndf.columns:
    if ndf[col].dtypes in ("object", "bool"):
      colNoValues.append( col )
    else:
      colValues.append( col )
  return colValues, colNoValues

# función auxiliar para separar datos de entrada y de salida
def separarDatosXY(ndf, atributo_clase="", xSoloNros=True):
  # hace una copia auxiliar del data frame
  cdf = ndf.copy()
  # saca el atributo clase (OPCIONAL)
  if atributo_clase == "":
    Y = []
  else:
    # datos atributo clase
    Y = np.array( cdf.pop(atributo_clase).fillna("-NAN-") )
  if xSoloNros:
    # se queda sólo con los atributos numéricos (OPCIONAL)
    for col in cdf.columns:
      if cdf[col].dtypes == "object":
          cdf.pop( col )
  # datos de entrada
  X = np.array(cdf.fillna(0.001))
  return X, Y, np.array(cdf.columns)

def convColsNumericas(ndf, atributos_no_convertir = []):
  # hace una copia auxiliar del data frame
  cdf = ndf.copy()
  # convierte todas las no numéricas a numéricas (OPCIONAL)
  for col in cdf.columns:
    if col not in atributos_no_convertir:
      if cdf[col].dtypes == "object":
        # genera diccionario de valores
        valores = cdf[col].unique()
        diccValores = dict(zip(valores, range(len(valores))))
        # realiza el reemplazo
        cdf[col] = cdf[col].map(lambda s: diccValores.get(s) if s in diccValores else s)
  return cdf

# función auxiliar
def generar_estadisticas_detalladas(orDF, titulo=""):
  # título
  print("\n", titulo, ": ")
  # obtiene las estadísticas generales
  estDF = orDF.describe().transpose()
  #  genera y formatea las estadísticas
  if "min" in estDF and "max" in estDF:
    rangoValores = "[ " + estDF["min"].apply('{:.2f}'.format) + " ; " + estDF["max"].apply('{:.2f}'.format) + " ]"
  else:
    rangoValores = estDF["unique"].fillna(0.0).apply('{:.0f}'.format)
  rangoValores.name = "Rango Valores"
  # para campos no numéricos muestra las cantidades por valor
  for col in orDF.columns:
    if orDF[col].dtypes in ("object", "bool"):
      auxStr = str( orDF[col].value_counts() ).replace("\n", " ; ")
      if (auxStr.index("Name")-3) > 0:
        # saca lo del final porque no sirve
        auxStr = auxStr[:auxStr.index("Name")-3]
      rangoValores[col] = "{ " + auxStr + " }"
  if "mean" in estDF and "std" in estDF:
    promValores = estDF["mean"].fillna(0.0).apply('{:.3f}'.format) + " ± " + estDF["std"].fillna(0.0).apply('{:.3f}'.format)
  else:
    promValores = estDF["count"].apply('{:.0f}'.format)
  promValores.name = "Promedio ± Desvío"
  # obtiene valores "ceros" y nulos
  zero_val = (orDF == 0.00).astype(int).sum(axis=0)
  zero_val.name = "¿Valores Ceros?"
  mis_val = orDF.isnull().sum()
  mis_val.name = "¿Valores Nulos?"
  # prepara la nueva tabla para mostrar
  nTable = pd.concat([orDF.dtypes, rangoValores, promValores, zero_val, mis_val], axis=1)
  nTable = nTable.rename( columns = {0: 'Tipo Valor',  1: 'Rango Valores', 2: 'Promedio ± Desvío', 3: '¿Valores Ceros?', 4: '¿Valores Nulos?' } )
  # muestra la nueva tabla
  pd.set_option('max_colwidth', None)
  display(nTable.fillna("-"))
  print("Tiene " + str(orDF.shape[1]) + " atributos y " + str(orDF.shape[0]) + " ejemplos.")
  print("\n")
  return

# muestra las estadísticas
generar_estadisticas_detalladas(df, "> Estadísticas de los datos recolectados")



 > Estadísticas de los datos recolectados : 


Unnamed: 0,Tipo Valor,Rango Valores,Promedio ± Desvío,¿Valores Ceros?,¿Valores Nulos?
Name,object,{ TIE Defender 2 ; Victory I Class Star Destroyer 2 ; Guardian Class Cruiser 1 ; TIE Raptor 1 ; Gamma Class Assault Shuttle 1 ; .. ; TIE Ground Targeting 1 ; Imperial Shuttle Pod 1 ; Victory II Class Star Destroyer 1 ; SHD-66 Shadow Droid 1 ; AT Barge 1 },107,0,0
Ship Type,object,{ TIE Fighters 20 ; Medium Ships 15 ; Heavy Ships 14 ; Other Starfighters 10 ; TIE Support Craft 10 ; Patrol Craft 7 ; Landing Craft 7 ; Shuttles 6 ; TIE Bombers 6 ; TIE Experimental Craft 5 ; Transporters 4 ; Command Ships 3 },107,0,0
Model,object,"{ Guardian Class Cruiser 1 ; Mu-1, Mu-2 and Mu-3 Class Shuttle 1 ; Armored T-1a Shuttle 1 ; Galleon Transport 1 ; Gamma Class Assault Shuttle 1 ; .. ; Acclamator Class Transport 1 ; TIE/gt Fighter 1 ; ISP-6 1 ; Victory II Class Star Destroyer 1 ; AT Barge 1 }",107,0,0
Manufacturer,object,{ Sienar Fleet Systems 33 ; Kuat Drive Yards 12 ; Cygnus Spaceworks 8 ; Rendili StarDrive 7 ; Sienar Fleet Systems / Imperial Department of Military Research 6 ; Telgorn Corporation 4 ; Republic Sienar Systems 3 ; Mesens Corporation 3 ; Corellian Engineering Corporation 3 ; Santhe / Sienar Technologies 3 ; Telgorn Corporation/KonGar Ship Works 2 ; Silviut Corporation 2 ; Sienar Fleet Systems / Zsinj Development Incorporated 2 ; Incom Corporation 2 ; Meller & Dax 2 ; Byss Worx / Imperial Department of Military Research 1 ; Sienar Fleet Systems/Kuat Drive Yards 1 ; Loronar / Rendili StarDrive /Sienar Fleet Systems and Kuat Drive Yards 1 ; Damorian Manufacturing Corporation 1 ; Loronar 1 ; Cygnus Spaceworks / Sienar Fleet Systems 1 ; Tagge Industries Shipyards 1 ; CPG Space Products 1 ; SoroSuub Corporation / Cavrilhu Shipyards 1 ; Rothana Heavy Engineering 1 ; Sienar Fleet Systems / Shobquix Yards 1 ; Sinar Fleet Systems 1 ; Sienar Fleet Systems / Chiss Ascendancy 1 ; Rothana Heavy Engineering / Kuat Drive Yards 1 ; Imperial Shipyards 1 },107,0,0
Length,object,"{ 7.8 meters 12 ; 6.3 meters 11 ; 153 meters 3 ; 1,600 meters 3 ; 20 meters 3 ; 15 meters 3 ; 6.6 meters 3 ; 38 meters 2 ; 898 meters 2 ; 29 meters 2 ; 2.1 meters 2 ; 9.2 meters 2 ; 598 meters 2 ; 14.3 meters 2 ; 40 meters 2 ; 35 meters 2 ; 100 meters 1 ; 15,000 meters 1 ; 37 meters 1 ; 44 meters 1 ; 253 meters 1 ; 21 meters 1 ; 538 meters 1 ; 1,150 meters 1 ; 230 meters 1 ; 42 meters 1 ; 8.4 meters 1 ; 24 meters 1 ; 345 meters 1 ; 450 meters 1 ; 25 meters 1 ; 75 meters 1 ; 353 meters 1 ; 298 meters 1 ; 7.4 meters 1 ; 2,500-17,600 meters 1 ; 150 meters 1 ; 18 meters 1 ; 13.8 meters 1 ; 300 meters 1 ; 7.65 meters 1 ; 11.4 meters 1 ; 43 meters 1 ; 5 meters 1 ; 51 meters 1 ; 32 meters 1 ; 15.6 meters 1 ; 764 meters 1 ; 52.5 meters 1 ; 4.3 meters 1 ; 8.7 meters 1 ; 28 meters 1 ; 501 meters 1 ; 6.1 meters 1 ; 2.8 meters 1 ; 4 meters 1 ; 752 meters 1 ; 7 meters 1 ; 16,000 meters (Prototype Lenght: 17,500 meters) 1 ; 17 meters 1 ; 180 meters 1 ; 1,137 meters 1 ; 6.7 meters 1 ; 600 meters 1 ; 119 meters 1 ; 250 meters 1 ; 50 meters 1 }",107,0,0
Crew,object,"{ 1 42 ; 2 8 ; 4 3 ; 15 3 ; 8 2 ; 3 2 ; 12 2 ; 6 2 ; 3-5 2 ; 1 fully automated droid brain 2 ; 5 2 ; 37,085 2 ; 2,112 1 ; 920 1 ; 3,053 1 ; 1,120 1 ; 605,745 1 ; 45 1 ; 36 1 ; 3,065 1 ; 87,730-619,370 1 ; 1,600-3,700 (varies according to mission profile) 1 ; 200 1 ; 2-4 1 ; 150 1 ; 5,200 1 ; 275 1 ; 1,092 1 ; 20 1 ; 7,400 1 ; 850 1 ; Cyborg brain 1 ; 30-165 (depending upon configuration) 1 ; 4-6 (Depending on configuration) 1 ; 906 1 ; None. (Remote controlled ship) 1 ; 2-4 (Depending on configuration) 1 ; 3,505 1 ; 6,107 1 ; 24,724 1 ; 700 1 ; 46 1 ; 712,645 1 ; 58 1 ; 16,210 1 ; 91 1 ; 2,807 1 }",107,0,0
Troops,object,"{ 60 3 ; 8 3 ; 300 2 ; 9,700 2 ; 1 (trainer or priority personnel) 2 ; 40 2 ; 800 2 ; 40 (Zero-G Assault Stormtroopers) 2 ; 10 2 ; 20 2 ; None (+10 prisoners) 1 ; Mu-1: 24 / Mu-2: 14 / Mu-3: 40 1 ; 1-3,300 (varies according to mission profile) 1 ; 4,778 1 ; 0 or 320 (Troop lander configuration) 1 ; 2 (priority personnel) 1 ; 3 1 ; 30 (or 10 Zero-G troopers) 1 ; 150 1 ; 2,040 1 ; 75 1 ; 340 1 ; 10 + 4 (trooper squad + passengers) 1 ; 142 1 ; 11,875-83,838 1 ; 80 1 ; 130,100 1 ; 5,000 1 ; 2,389 1 ; 54 1 ; 1-600 (depending upon configuration) 1 ; 20 (+10 prisoners) 1 ; 1-24 (Depending on configuration) 1 ; 400 (security wardens), 8,000 (prisoners in standard cells), 1,000 (prisoners in special holding cells) 1 ; 160 1 ; 1-20 (Depending on configuration) 1 ; 1,600 1 ; 16,000 1 ; 1-100 (depending upon configuration) 1 ; 18 1 ; 150,000 1 ; 15 1 ; 2,000 (Minimum capacity) 1 ; 3,000 1 ; 40 + 10 AT-PTs 1 ; 30-40 (Depending on configuration) 1 }",58,0,49
Cargo Capacity,object,"{ 65 kilograms 11 ; 150 kilograms 6 ; 100 metric tons 5 ; 500 metric tons 5 ; 45 kilograms 4 ; 200 metric tons 3 ; 100 kilograms 3 ; 1 metric ton 3 ; 6,000 metric tons 2 ; 80 kilograms 2 ; 400 metric tons 2 ; 20,000 metric tons 2 ; 300 metric tons 2 ; 15 metric tons (bomb bay) 2 ; 8,100 metric tons 2 ; 2,000 metric tons 2 ; 50 metric tons 2 ; 50 kilograms 2 ; 75 kilograms 2 ; 36,000 metric tons 2 ; 20 metric tons 1 ; 10,000 metric tons 1 ; 8,500 metric tons 1 ; 5,500 metric tons 1 ; 400,000 metric tons 1 ; 78,125-551,563 metric tons 1 ; 55 kilograms 1 ; (varies according to mission profile) 1 ; 15 metric tons (Mark 2 can hold 400 metric tons) 1 ; 100,000 metric tons 1 ; 5 metric tons 1 ; 0 or 1600 metric tons (Cargo transport configuration) 1 ; Mu-1 and Mu-2: 100 metric tons / Mu-3: 50 metric tons 1 ; 3,500 metric tons 1 ; 150 metric tons 1 ; 15 kilograms 1 ; 15 metric tons (troop compartment) 1 ; 17 + 1 metric tons (passenger pod + cargo space) 1 ; 25 kilograms 1 ; 4,050 metric tons 1 ; 0-80 metric tons (Depending on configuration) 1 ; 3,000 metric tons 1 ; 250 metric tons 1 ; 0-120 metric tons (Depending on configuration) 1 ; 1,500 metric tons 1 ; 110 kilograms 1 ; 60 metric tons (bomb bay) 1 ; 30 metric tons (bomb bay) 1 ; 30 metric tons 1 ; 40 kilograms 1 ; 180 metric tons 1 ; 500 kilograms 1 ; 15,000 metric tons 1 ; 500 kilograms (+ 17 metric tons if no troopers are carried) 1 ; 600,000 metric tons 1 ; 10,00 metric tons (Minimum capacity) 1 ; 45 (+ 100 if no passengers are carried) kilograms 1 ; 9,000 metric tons 1 ; 900 metric tons 1 ; 200 kilograms 1 }",104,0,3
Consumables,object,{ 2 days 25 ; 1 week 13 ; 2 months 10 ; 5 days 10 ; 1 day 9 ; 1 year 7 ; 3 months 4 ; 1 month 4 ; 6 years 3 ; 4 years 3 ; 2 years 3 ; 6 months 3 ; 3 years 2 ; 5 years 1 ; 1.5 years 1 ; Mu-1 and Mu-2: 6 months / Mu-3: 2 months 1 ; 5 months 1 ; 10 years 1 ; 4 days 1 ; 3 month 1 ; 8 months 1 ; 9 months 1 ; 1 months 1 ; 1.2 years 1 },107,0,0
Hyperdrive Multiplier,object,"{ x2 31 ; x1 23 ; x3 6 ; x4 2 ; x0.6 1 ; x1.5 1 ; x0.5 1 ; x1,5 1 ; none (Mark 2 has x1) 1 ; x3-x1 1 ; x0.55 1 }",69,0,38


Tiene 19 atributos y 107 ejemplos.




#Preparar Datos de Naves Imperiales

In [None]:
#@title Preparar Campos donde se saca texto

# Hace copia de trabajo (para romper datos extraidos)
ndf = df.copy()

# funciones auxiliares
def extraerPrimerNro(columnName):
  global ndf
  ndf[columnName] = ndf[columnName].str.extract('(\d)\D*', expand=True)
  #ndf[columnName] = ndf[columnName].str.replace('\D+', '', regex=True)
  return

def reemplazar(columnName, oldValue, newValue=""):
  global ndf
  ndf[columnName] = ndf[columnName].str.replace(oldValue, newValue, regex=False)
  ndf[columnName] = ndf[columnName].fillna(0)
  return

def completarVacios(columnName, newValue=-1):
  global ndf
  ndf[columnName] = ndf[columnName].fillna(newValue)
  return

def convertirFloat(columnName):
  global ndf
  ndf[columnName] = ndf[columnName].astype(float)

print("-procesa Length")
reemplazar("Length", " meters")
extraerPrimerNro("Length")
completarVacios("Length", 0)
convertirFloat("Length")

print("-procesa Crew")
extraerPrimerNro("Crew")
completarVacios("Crew", 0)
convertirFloat("Crew")

print("-procesa Troops")
extraerPrimerNro("Troops")
completarVacios("Troops", 0)
convertirFloat("Troops")

print("-procesa Hyperdrive Multiplier")
extraerPrimerNro("Hyperdrive Multiplier")
completarVacios("Hyperdrive Multiplier", -1)
convertirFloat("Hyperdrive Multiplier")

print("-procesa Hyperdrive Backup")
extraerPrimerNro("Hyperdrive Backup")
completarVacios("Hyperdrive Backup", -1)
convertirFloat("Hyperdrive Backup")

print("-procesa Speed")
reemplazar("Speed", " MGLT")
extraerPrimerNro("Speed")
completarVacios("Speed", 0)
convertirFloat("Speed")

print("-procesa Hull")
reemplazar("Hull", " RU")
extraerPrimerNro("Hull")
completarVacios("Hull", 0)
convertirFloat("Hull")

print("-procesa Shields")
reemplazar("Shields", " SBD")
extraerPrimerNro("Shields")
completarVacios("Shields", -1)
convertirFloat("Shields")

# muestra cambios
ndf.head()

-procesa Length
-procesa Crew
-procesa Troops
-procesa Hyperdrive Multiplier
-procesa Hyperdrive Backup
-procesa Speed
-procesa Hull
-procesa Shields


Unnamed: 0,Name,Ship Type,Model,Manufacturer,Length,Crew,Troops,Cargo Capacity,Consumables,Hyperdrive Multiplier,Hyperdrive Backup,Speed,Hull,Shields,Weapons,URL_data,URL_image,Special Features,Onboard Craft
0,Guardian Class Cruiser,Patrol Craft,Guardian Class Cruiser,Sienar Fleet Systems,4.0,2.0,1.0,200 metric tons,3 months,1.0,1.0,7.0,8.0,1.0,2 Laser Cannons and 2 Laser Cannon Turrets.,http://insd.swcombine.com/insd/guardian.htm,http://insd.swcombine.com/insd/Ship_files/GUARDIAN.JPG,,
1,TIE Aggressor,TIE Fighters,TIE/agg Fighter,Santhe / Sienar Technologies,7.0,2.0,0.0,150 kilograms,5 days,4.0,-1.0,9.0,1.0,2.0,"2 Medium Laser Cannons, 2 Concussion Missile Launchers and 1 Twin Blaster Cannon turret.",http://insd.swcombine.com/insd/tieaggre.htm,http://insd.swcombine.com/insd/Ship_files/TIEAGR.JPG,,
2,Kappa Class Landing Craft,Landing Craft,Kappa Class Landing Craft,Republic Sienar Systems,3.0,4.0,4.0,50 metric tons,1 month,1.0,1.0,7.0,2.0,6.0,1 Double Blaster Cannon Turret and 2 Double Blaster Cannons.,http://insd.swcombine.com/insd/kappa.htm,http://insd.swcombine.com/insd/Ship_files/Kappa.jpg,,
3,Death Star Defender,Other Starfighters,Death Star Defender,CPG Space Products,5.0,1.0,0.0,50 kilograms,1 week,-1.0,-1.0,1.0,7.0,1.0,2 Laser Cannons,http://insd.swcombine.com/insd/dsd.htm,http://insd.swcombine.com/insd/Ship_files/DSD.JPG,Adjustable Attack Wing design.,
4,Corellian CR90 Corvette,Medium Ships,Corellian CR90 Corvette,Corellian Engineering Corporation,1.0,3.0,1.0,"3,000 metric tons",1 year,2.0,-1.0,2.0,1.0,4.0,2 Double Turbolaser Cannons and 4 Turbolaser Cannons,http://insd.swcombine.com/insd/corvet.htm,http://insd.swcombine.com/insd/Ship_files/CORCORVE.JPG,,


In [None]:
#@title Preparar campo Cargo Capacity

print("-procesa Cargo Capacity (pasa todo a kilogramos)")

# realiza los cambios considerando tipo de métrica
auxOriList = list(df["Cargo Capacity"])
auxNewList = []
for val in auxOriList:
  #print(val)
  if (val is None) or (str(val) in ["nan", "(varies according to mission profile)"]):
    auxNewList.append( -1 )
  elif str(val)=="Mu-1 and Mu-2: 100 metric tons / Mu-3: 50 metric tons":
    auxNewList.append( 75 )
  else:
    val = val.replace(",", "")
    val = val.replace(" + 1", "")
    if "(" in val:
      posParent = val.index("(")
    else:
      posParent = -1
    if "-" in val:
      # calcula promedio
      posSep1 = val.index("-")
      posSep2 = val.index(" ")
      nval = ( float(val[0:posSep1]) + float(val[posSep1+1:posSep2]) ) / 2
      val = str(nval) + val[posSep2:]
    if "or" in val:
      # calcula promedio
      posSep1 = val.index(" or ")
      posSep2 = val[posSep1+4:].index(" ") + posSep1 + 4
      nval = ( float(val[0:posSep1]) + float(val[posSep1+4:posSep2]) ) / 2
      val = str(nval) + val[posSep2:]
    if "kilograms" in val:
      posAux = val.index("kilograms")
      if (posParent>0) and (posParent<posAux):
        posAux = posParent
      auxNewList.append( float(val[:posAux].strip()) )
    elif "metric ton" in val:
      posAux = val.index("metric ton")
      if (posParent>0) and (posParent<posAux):
        posAux = posParent
      auxNewList.append( float(val[:posAux].strip()) * 1000 )
    else:
      print("No se puede procesar: ", val)

# actualiza los datos
ndf["Cargo Capacity"] = auxNewList
convertirFloat("Cargo Capacity")


ndf.head()

-procesa Cargo Capacity (pasa todo a kilogramos)


Unnamed: 0,Name,Ship Type,Model,Manufacturer,Length,Crew,Troops,Cargo Capacity,Consumables,Hyperdrive Multiplier,Hyperdrive Backup,Speed,Hull,Shields,Weapons,URL_data,URL_image,Special Features,Onboard Craft
0,Guardian Class Cruiser,Patrol Craft,Guardian Class Cruiser,Sienar Fleet Systems,4.0,2.0,1.0,200000.0,3 months,1.0,1.0,7.0,8.0,1.0,2 Laser Cannons and 2 Laser Cannon Turrets.,http://insd.swcombine.com/insd/guardian.htm,http://insd.swcombine.com/insd/Ship_files/GUARDIAN.JPG,,
1,TIE Aggressor,TIE Fighters,TIE/agg Fighter,Santhe / Sienar Technologies,7.0,2.0,0.0,150.0,5 days,4.0,-1.0,9.0,1.0,2.0,"2 Medium Laser Cannons, 2 Concussion Missile Launchers and 1 Twin Blaster Cannon turret.",http://insd.swcombine.com/insd/tieaggre.htm,http://insd.swcombine.com/insd/Ship_files/TIEAGR.JPG,,
2,Kappa Class Landing Craft,Landing Craft,Kappa Class Landing Craft,Republic Sienar Systems,3.0,4.0,4.0,50000.0,1 month,1.0,1.0,7.0,2.0,6.0,1 Double Blaster Cannon Turret and 2 Double Blaster Cannons.,http://insd.swcombine.com/insd/kappa.htm,http://insd.swcombine.com/insd/Ship_files/Kappa.jpg,,
3,Death Star Defender,Other Starfighters,Death Star Defender,CPG Space Products,5.0,1.0,0.0,50.0,1 week,-1.0,-1.0,1.0,7.0,1.0,2 Laser Cannons,http://insd.swcombine.com/insd/dsd.htm,http://insd.swcombine.com/insd/Ship_files/DSD.JPG,Adjustable Attack Wing design.,
4,Corellian CR90 Corvette,Medium Ships,Corellian CR90 Corvette,Corellian Engineering Corporation,1.0,3.0,1.0,3000000.0,1 year,2.0,-1.0,2.0,1.0,4.0,2 Double Turbolaser Cannons and 4 Turbolaser Cannons,http://insd.swcombine.com/insd/corvet.htm,http://insd.swcombine.com/insd/Ship_files/CORCORVE.JPG,,


In [None]:
#@title Preparar campo Consumables

print("-procesa Consumables (pasa todo a meses)")

# realiza los cambios considerando tipo de métrica
auxOriList = list(df["Consumables"])
auxNewList = []
for val in auxOriList:
  #print(val)
  if (val is None) or (str(val) in ["nan", "(varies according to mission profile)"]):
    auxNewList.append( -1 )
  if str(val) == "Mu-1 and Mu-2: 6 months / Mu-3: 2 months":
    auxNewList.append( 4 )
  else:
    val = val.replace(",", "")
    #val = val.replace(" + 1", "")
    if "(" in val:
      posParent = val.index("(")
    else:
      posParent = -1
    if "-" in val:
      # calcula promedio
      posSep1 = val.index("-")
      posSep2 = val.index(" ")
      nval = ( float(val[0:posSep1]) + float(val[posSep1+1:posSep2]) ) / 2
      val = str(nval) + val[posSep2:]
    if "or" in val:
      # calcula promedio
      posSep1 = val.index(" or ")
      posSep2 = val[posSep1+4:].index(" ") + posSep1 + 4
      nval = ( float(val[0:posSep1]) + float(val[posSep1+4:posSep2]) ) / 2
      val = str(nval) + val[posSep2:]
    if "month" in val:
      posAux = val.index("month")
      if (posParent>0) and (posParent<posAux):
        posAux = posParent
      auxNewList.append( float(val[:posAux].strip()) )
    elif "year" in val:
      posAux = val.index("year")
      if (posParent>0) and (posParent<posAux):
        posAux = posParent
      auxNewList.append( float(val[:posAux].strip()) * 12 )
    elif "week" in val:
      posAux = val.index("week")
      if (posParent>0) and (posParent<posAux):
        posAux = posParent
      auxNewList.append( float(val[:posAux].strip()) / 4.5 )
    elif "day" in val:
      posAux = val.index("day")
      if (posParent>0) and (posParent<posAux):
        posAux = posParent
      auxNewList.append( float(val[:posAux].strip()) / 30 )
    else:
      print("No se puede procesar: ", val)

# actualiza los datos
ndf["Consumables"] = auxNewList
convertirFloat("Consumables")

ndf.head()

-procesa Consumables (pasa todo a meses)


Unnamed: 0,Name,Ship Type,Model,Manufacturer,Length,Crew,Troops,Cargo Capacity,Consumables,Hyperdrive Multiplier,Hyperdrive Backup,Speed,Hull,Shields,Weapons,URL_data,URL_image,Special Features,Onboard Craft
0,Guardian Class Cruiser,Patrol Craft,Guardian Class Cruiser,Sienar Fleet Systems,4.0,2.0,1.0,200000.0,3.0,1.0,1.0,7.0,8.0,1.0,2 Laser Cannons and 2 Laser Cannon Turrets.,http://insd.swcombine.com/insd/guardian.htm,http://insd.swcombine.com/insd/Ship_files/GUARDIAN.JPG,,
1,TIE Aggressor,TIE Fighters,TIE/agg Fighter,Santhe / Sienar Technologies,7.0,2.0,0.0,150.0,0.166667,4.0,-1.0,9.0,1.0,2.0,"2 Medium Laser Cannons, 2 Concussion Missile Launchers and 1 Twin Blaster Cannon turret.",http://insd.swcombine.com/insd/tieaggre.htm,http://insd.swcombine.com/insd/Ship_files/TIEAGR.JPG,,
2,Kappa Class Landing Craft,Landing Craft,Kappa Class Landing Craft,Republic Sienar Systems,3.0,4.0,4.0,50000.0,1.0,1.0,1.0,7.0,2.0,6.0,1 Double Blaster Cannon Turret and 2 Double Blaster Cannons.,http://insd.swcombine.com/insd/kappa.htm,http://insd.swcombine.com/insd/Ship_files/Kappa.jpg,,
3,Death Star Defender,Other Starfighters,Death Star Defender,CPG Space Products,5.0,1.0,0.0,50.0,0.222222,-1.0,-1.0,1.0,7.0,1.0,2 Laser Cannons,http://insd.swcombine.com/insd/dsd.htm,http://insd.swcombine.com/insd/Ship_files/DSD.JPG,Adjustable Attack Wing design.,
4,Corellian CR90 Corvette,Medium Ships,Corellian CR90 Corvette,Corellian Engineering Corporation,1.0,3.0,1.0,3000000.0,12.0,2.0,-1.0,2.0,1.0,4.0,2 Double Turbolaser Cannons and 4 Turbolaser Cannons,http://insd.swcombine.com/insd/corvet.htm,http://insd.swcombine.com/insd/Ship_files/CORCORVE.JPG,,


In [None]:
#@title Preparar campo Manufacturer

print("-procesa Manufacturer (asigna codigo valor ID)")

from sklearn.preprocessing import LabelEncoder

encoder = LabelEncoder().fit(df["Manufacturer"])
ndf["Manufacturer"] = encoder.transform(df["Manufacturer"])
convertirFloat("Manufacturer")

# muestra códificación asignada
print("\n\tCodificación asignada: ")
for i in range(len(encoder.classes_)):
  print("\t\t", i, ":", encoder.classes_[i])
print("")

ndf.head()

-procesa Manufacturer (asigna codigo valor ID)

	Codificación asignada: 
		 0 : Byss Worx / Imperial Department of Military Research
		 1 : CPG Space Products
		 2 : Corellian Engineering Corporation
		 3 : Cygnus Spaceworks
		 4 : Cygnus Spaceworks / Sienar Fleet Systems
		 5 : Damorian Manufacturing Corporation
		 6 : Imperial Shipyards
		 7 : Incom Corporation
		 8 : Kuat Drive Yards
		 9 : Loronar
		 10 : Loronar / Rendili StarDrive /Sienar Fleet Systems and Kuat Drive Yards
		 11 : Meller & Dax
		 12 : Mesens Corporation
		 13 : Rendili StarDrive
		 14 : Republic Sienar Systems
		 15 : Rothana Heavy Engineering
		 16 : Rothana Heavy Engineering / Kuat Drive Yards
		 17 : Santhe / Sienar Technologies
		 18 : Sienar Fleet Systems
		 19 : Sienar Fleet Systems / Chiss Ascendancy
		 20 : Sienar Fleet Systems / Imperial Department of Military Research
		 21 : Sienar Fleet Systems / Shobquix Yards
		 22 : Sienar Fleet Systems / Zsinj Development Incorporated
		 23 : Sienar Fleet Systems/

Unnamed: 0,Name,Ship Type,Model,Manufacturer,Length,Crew,Troops,Cargo Capacity,Consumables,Hyperdrive Multiplier,Hyperdrive Backup,Speed,Hull,Shields,Weapons,URL_data,URL_image,Special Features,Onboard Craft
0,Guardian Class Cruiser,Patrol Craft,Guardian Class Cruiser,18.0,4.0,2.0,1.0,200000.0,3.0,1.0,1.0,7.0,8.0,1.0,2 Laser Cannons and 2 Laser Cannon Turrets.,http://insd.swcombine.com/insd/guardian.htm,http://insd.swcombine.com/insd/Ship_files/GUARDIAN.JPG,,
1,TIE Aggressor,TIE Fighters,TIE/agg Fighter,17.0,7.0,2.0,0.0,150.0,0.166667,4.0,-1.0,9.0,1.0,2.0,"2 Medium Laser Cannons, 2 Concussion Missile Launchers and 1 Twin Blaster Cannon turret.",http://insd.swcombine.com/insd/tieaggre.htm,http://insd.swcombine.com/insd/Ship_files/TIEAGR.JPG,,
2,Kappa Class Landing Craft,Landing Craft,Kappa Class Landing Craft,14.0,3.0,4.0,4.0,50000.0,1.0,1.0,1.0,7.0,2.0,6.0,1 Double Blaster Cannon Turret and 2 Double Blaster Cannons.,http://insd.swcombine.com/insd/kappa.htm,http://insd.swcombine.com/insd/Ship_files/Kappa.jpg,,
3,Death Star Defender,Other Starfighters,Death Star Defender,1.0,5.0,1.0,0.0,50.0,0.222222,-1.0,-1.0,1.0,7.0,1.0,2 Laser Cannons,http://insd.swcombine.com/insd/dsd.htm,http://insd.swcombine.com/insd/Ship_files/DSD.JPG,Adjustable Attack Wing design.,
4,Corellian CR90 Corvette,Medium Ships,Corellian CR90 Corvette,2.0,1.0,3.0,1.0,3000000.0,12.0,2.0,-1.0,2.0,1.0,4.0,2 Double Turbolaser Cannons and 4 Turbolaser Cannons,http://insd.swcombine.com/insd/corvet.htm,http://insd.swcombine.com/insd/Ship_files/CORCORVE.JPG,,


In [None]:
#@title Preparar campo Weapons

print("-procesa Weapons (usando clustering)")

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans
from sklearn.metrics import adjusted_rand_score

def clusterizarDatosString(colunmnName, cantClusters=5):
  global ndf
  ndf[colunmnName] = ndf[colunmnName].fillna("-")
  documents  = list( ndf[colunmnName] )

  vectorizer = TfidfVectorizer(stop_words='english')
  X = vectorizer.fit_transform(documents)

  model = KMeans(n_clusters=cantClusters, init='k-means++', max_iter=100, n_init=1)
  ndf[colunmnName] = model.fit_transform(X)

clusterizarDatosString("Weapons")
convertirFloat("Weapons")

ndf.head()

-procesa Weapons (usando clustering)


Unnamed: 0,Name,Ship Type,Model,Manufacturer,Length,Crew,Troops,Cargo Capacity,Consumables,Hyperdrive Multiplier,Hyperdrive Backup,Speed,Hull,Shields,Weapons,URL_data,URL_image,Special Features,Onboard Craft
0,Guardian Class Cruiser,Patrol Craft,Guardian Class Cruiser,18.0,4.0,2.0,1.0,200000.0,3.0,1.0,1.0,7.0,8.0,1.0,0.598089,http://insd.swcombine.com/insd/guardian.htm,http://insd.swcombine.com/insd/Ship_files/GUARDIAN.JPG,,
1,TIE Aggressor,TIE Fighters,TIE/agg Fighter,17.0,7.0,2.0,0.0,150.0,0.166667,4.0,-1.0,9.0,1.0,2.0,0.988637,http://insd.swcombine.com/insd/tieaggre.htm,http://insd.swcombine.com/insd/Ship_files/TIEAGR.JPG,,
2,Kappa Class Landing Craft,Landing Craft,Kappa Class Landing Craft,14.0,3.0,4.0,4.0,50000.0,1.0,1.0,1.0,7.0,2.0,6.0,1.101076,http://insd.swcombine.com/insd/kappa.htm,http://insd.swcombine.com/insd/Ship_files/Kappa.jpg,,
3,Death Star Defender,Other Starfighters,Death Star Defender,1.0,5.0,1.0,0.0,50.0,0.222222,-1.0,-1.0,1.0,7.0,1.0,0.508596,http://insd.swcombine.com/insd/dsd.htm,http://insd.swcombine.com/insd/Ship_files/DSD.JPG,Adjustable Attack Wing design.,
4,Corellian CR90 Corvette,Medium Ships,Corellian CR90 Corvette,2.0,1.0,3.0,1.0,3000000.0,12.0,2.0,-1.0,2.0,1.0,4.0,1.063572,http://insd.swcombine.com/insd/corvet.htm,http://insd.swcombine.com/insd/Ship_files/CORCORVE.JPG,,


In [None]:
#@title Preparar campo Special Features

print("-procesa Special Features (usando clustering)")

clusterizarDatosString("Special Features")
convertirFloat("Special Features")

ndf.head()

-procesa Special Features (usando clustering)


Unnamed: 0,Name,Ship Type,Model,Manufacturer,Length,Crew,Troops,Cargo Capacity,Consumables,Hyperdrive Multiplier,Hyperdrive Backup,Speed,Hull,Shields,Weapons,URL_data,URL_image,Special Features,Onboard Craft
0,Guardian Class Cruiser,Patrol Craft,Guardian Class Cruiser,18.0,4.0,2.0,1.0,200000.0,3.0,1.0,1.0,7.0,8.0,1.0,0.598089,http://insd.swcombine.com/insd/guardian.htm,http://insd.swcombine.com/insd/Ship_files/GUARDIAN.JPG,0.038436,
1,TIE Aggressor,TIE Fighters,TIE/agg Fighter,17.0,7.0,2.0,0.0,150.0,0.166667,4.0,-1.0,9.0,1.0,2.0,0.988637,http://insd.swcombine.com/insd/tieaggre.htm,http://insd.swcombine.com/insd/Ship_files/TIEAGR.JPG,0.038436,
2,Kappa Class Landing Craft,Landing Craft,Kappa Class Landing Craft,14.0,3.0,4.0,4.0,50000.0,1.0,1.0,1.0,7.0,2.0,6.0,1.101076,http://insd.swcombine.com/insd/kappa.htm,http://insd.swcombine.com/insd/Ship_files/Kappa.jpg,0.038436,
3,Death Star Defender,Other Starfighters,Death Star Defender,1.0,5.0,1.0,0.0,50.0,0.222222,-1.0,-1.0,1.0,7.0,1.0,0.508596,http://insd.swcombine.com/insd/dsd.htm,http://insd.swcombine.com/insd/Ship_files/DSD.JPG,0.980457,
4,Corellian CR90 Corvette,Medium Ships,Corellian CR90 Corvette,2.0,1.0,3.0,1.0,3000000.0,12.0,2.0,-1.0,2.0,1.0,4.0,1.063572,http://insd.swcombine.com/insd/corvet.htm,http://insd.swcombine.com/insd/Ship_files/CORCORVE.JPG,0.038436,


In [None]:
#@title Preparar campo Onboard Craft

print("-procesa Onboard Craft (usando clustering)")

clusterizarDatosString("Onboard Craft")
convertirFloat("Onboard Craft")

ndf.head()

-procesa Onboard Craft (usando clustering)


Unnamed: 0,Name,Ship Type,Model,Manufacturer,Length,Crew,Troops,Cargo Capacity,Consumables,Hyperdrive Multiplier,Hyperdrive Backup,Speed,Hull,Shields,Weapons,URL_data,URL_image,Special Features,Onboard Craft
0,Guardian Class Cruiser,Patrol Craft,Guardian Class Cruiser,18.0,4.0,2.0,1.0,200000.0,3.0,1.0,1.0,7.0,8.0,1.0,0.598089,http://insd.swcombine.com/insd/guardian.htm,http://insd.swcombine.com/insd/Ship_files/GUARDIAN.JPG,0.038436,0.020437
1,TIE Aggressor,TIE Fighters,TIE/agg Fighter,17.0,7.0,2.0,0.0,150.0,0.166667,4.0,-1.0,9.0,1.0,2.0,0.988637,http://insd.swcombine.com/insd/tieaggre.htm,http://insd.swcombine.com/insd/Ship_files/TIEAGR.JPG,0.038436,0.020437
2,Kappa Class Landing Craft,Landing Craft,Kappa Class Landing Craft,14.0,3.0,4.0,4.0,50000.0,1.0,1.0,1.0,7.0,2.0,6.0,1.101076,http://insd.swcombine.com/insd/kappa.htm,http://insd.swcombine.com/insd/Ship_files/Kappa.jpg,0.038436,0.020437
3,Death Star Defender,Other Starfighters,Death Star Defender,1.0,5.0,1.0,0.0,50.0,0.222222,-1.0,-1.0,1.0,7.0,1.0,0.508596,http://insd.swcombine.com/insd/dsd.htm,http://insd.swcombine.com/insd/Ship_files/DSD.JPG,0.980457,0.020437
4,Corellian CR90 Corvette,Medium Ships,Corellian CR90 Corvette,2.0,1.0,3.0,1.0,3000000.0,12.0,2.0,-1.0,2.0,1.0,4.0,1.063572,http://insd.swcombine.com/insd/corvet.htm,http://insd.swcombine.com/insd/Ship_files/CORCORVE.JPG,0.038436,0.020437


In [None]:
#@title Eliminar campos Name, Model y URLs

ndf = ndf.drop(columns=["Name", "Model", "URL_data", "URL_image"])

ndf.head()

Unnamed: 0,Ship Type,Manufacturer,Length,Crew,Troops,Cargo Capacity,Consumables,Hyperdrive Multiplier,Hyperdrive Backup,Speed,Hull,Shields,Weapons,Special Features,Onboard Craft
0,Patrol Craft,18.0,4.0,2.0,1.0,200000.0,3.0,1.0,1.0,7.0,8.0,1.0,0.598089,0.038436,0.020437
1,TIE Fighters,17.0,7.0,2.0,0.0,150.0,0.166667,4.0,-1.0,9.0,1.0,2.0,0.988637,0.038436,0.020437
2,Landing Craft,14.0,3.0,4.0,4.0,50000.0,1.0,1.0,1.0,7.0,2.0,6.0,1.101076,0.038436,0.020437
3,Other Starfighters,1.0,5.0,1.0,0.0,50.0,0.222222,-1.0,-1.0,1.0,7.0,1.0,0.508596,0.980457,0.020437
4,Medium Ships,2.0,1.0,3.0,1.0,3000000.0,12.0,2.0,-1.0,2.0,1.0,4.0,1.063572,0.038436,0.020437


In [None]:
#@title Mostrar estadísticas de datos preparados

generar_estadisticas_detalladas(ndf, "> Estadísticas de los datos preparados")



 > Estadísticas de los datos preparados : 


Unnamed: 0,Tipo Valor,Rango Valores,Promedio ± Desvío,¿Valores Ceros?,¿Valores Nulos?
Ship Type,object,{ TIE Fighters 20 ; Medium Ships 15 ; Heavy Ships 14 ; Other Starfighters 10 ; TIE Support Craft 10 ; Patrol Craft 7 ; Landing Craft 7 ; Shuttles 6 ; TIE Bombers 6 ; TIE Experimental Craft 5 ; Transporters 4 ; Command Ships 3 },-,0,0
Manufacturer,float64,[ 0.00 ; 29.00 ],14.626 ± 7.205,1,0
Length,float64,[ 1.00 ; 9.00 ],4.065 ± 2.466,0,0
Crew,float64,[ 0.00 ; 9.00 ],2.495 ± 2.250,2,0
Troops,float64,[ 0.00 ; 9.00 ],1.738 ± 2.485,50,0
Cargo Capacity,float64,[ -1.00 ; 600000000.00 ],15213112.766 ± 75767701.080,0,0
Consumables,float64,[ 0.03 ; 120.00 ],8.328 ± 19.252,0,0
Hyperdrive Multiplier,float64,[ -1.00 ; 4.00 ],0.748 ± 1.461,3,0
Hyperdrive Backup,float64,[ -1.00 ; 8.00 ],0.682 ± 2.648,0,0
Speed,float64,[ 1.00 ; 9.00 ],4.617 ± 3.226,0,0


Tiene 15 atributos y 107 ejemplos.




#Exportar datos de Naves Imperiales

In [None]:
#@title Exporta los datos como CSV

# exporta datos originales
nomArch = "/content/navesOri.csv"
df.to_csv(nomArch, index=False)
files.download(nomArch)
print("Datos originales exportados como navesOri.csv")

nomArch = "/content/naves.csv"
ndf.to_csv(nomArch, index=False)
files.download(nomArch)
print("Datos preparados exportados como naves.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Datos originales exportados como navesOri.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Datos preparados exportados como naves.csv
