In [108]:
import pandas as pd
import requests
import re
from tqdm.auto import tqdm
import json
import fasttext
from collections import Counter
import numpy as np

In [3]:
# Enable tqdm for pandas
tqdm.pandas()

In [79]:
# Load overview of files
df = pd.read_csv('data/UMLFiles_List_V2.0.csv', sep=',')

# Initial data cleaning

In [80]:
# Get filetypes: we cannot use image files for scraping
df['filetype'] = df['Model Link - Github'].apply(lambda x: x.split('.')[-1])

# Check which filetypes are there
print(df['filetype'].unique())

# Remove rows with #VALUE! in it
df = df[df['Model Link - Github'] != '#VALUE!']

# Remove unusable file types
df = df[df['filetype'].isin(['xmi', 'uml'])]

# The dataframe contains GitHub URLs, not the "raw" files that we need
# Function to get the raw file URL from the GitHub URL
def get_raw_file(url):
    response = requests.get(url)
    data = response.text
    targets = re.findall(r'href="(.+/raw/.+?)"', data)
    
    try: 
        return 'https://github.com' + targets[0]
    except:
        # Page no longer exists, add tag to remove later
        return 'REMOVE'

# Do the harvesting 
df['raw_filepath'] = df['Model Link - Github'].progress_apply(lambda x: get_raw_file(x))

# Remove all (currently) non-existent file paths from the dataframe
df = df[df['raw_filepath'] != 'REMOVE']

['png' 'xmi' 'uml' 'jpg' 'svg' 'jpeg' 'gif' 'bmp' '#VALUE!']


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=35766.0), HTML(value='')))




In [4]:
# Save file for later use
df.to_csv('data/cleaned_files.csv', sep=',', index=False)

# Start wrangling

In [99]:
df = pd.read_csv('data/cleaned_files.csv', sep=',')

In [100]:
len(df)

30162

## UML file types

In [101]:
# UML file types
umls = df[df['filetype'] == 'uml']

# Saving regular expression patterns
attr_pattern_list = []
class_pattern_list = []

In [102]:
len(umls)

26872

## Defining patterns


### Programming-like class definition 

[In this file,](https://raw.githubusercontent.com/2ndchance/1400/master/projects/lionheart/docs/class.uml) classes are loosely defined as they would be in a programming language, as follows:

- class Unit
- class Crown
- class Knight
- class Archer
- class Infantry


In [6]:
class_pattern_list.append(r'\s[C|c]lass (\w+)')

### StarUML patterns

[In this file](https://raw.githubusercontent.com/ahmadpaudji/DTTI/master/Data%20Analisis/Model/UML/Relasiclass/RelasiClass.uml), classes are defined as types of XPD tags with the type UMLClassView, holding - among a lot of information about styling - the name in a NameLabel:

```
<XPD:OBJ name="OwnedViews[1]" type="UMLClassView" guid="v3U+HcJ2A0Spjyh+TK3TXwAA">
    <XPD:REF name="Model">kLKcecl2p0mgUzYZeLw6ugAA</XPD:REF>
    <XPD:OBJ name="NameCompartment" type="UMLNameCompartmentView" guid="mWqid1MIQkOogFZpBhhw4wAA">
        <XPD:OBJ name="NameLabel" type="LabelView" guid="8MeBm9Aod0ONFTy5oBebLwAA">
            <XPD:ATTR name="FontStyle" type="integer">1</XPD:ATTR>
            <XPD:ATTR name="Text" type="string">Restaurateur</XPD:ATTR>
        </XPD:OBJ>
        <XPD:OBJ name="StereotypeLabel" type="LabelView" guid="+JkN9MPr60W4Xzrm5xB/yAAA">
            <XPD:ATTR name="Visible" type="boolean">False</XPD:ATTR>
        </XPD:OBJ>
        <XPD:OBJ name="PropertyLabel" type="LabelView" guid="815dty7IP0CoNKguDRdc1gAA">
            <XPD:ATTR name="Visible" type="boolean">False</XPD:ATTR>
        </XPD:OBJ>
    </XPD:OBJ>
    <XPD:OBJ name="AttributeCompartment" type="UMLAttributeCompartmentView" guid="a/fEJW1c8UWMqIqW2WiKggAA">
        <XPD:REF name="Model">kLKcecl2p0mgUzYZeLw6ugAA</XPD:REF>
    </XPD:OBJ>
</XPD:OBJ>
```

Attributes have their own XPD tag with the type UMLAttribute:

```
<XPD:OBJ name="Attributes[0]" type="UMLAttribute" guid="X1GHr9/mo0qO3cVzMG4rygAA">
    <XPD:ATTR name="Name" type="string">idIngredient</XPD:ATTR>
    <XPD:REF name="Owner">vCJCqw/CaE6zNzyIYm3smwAA</XPD:REF>
</XPD:OBJ>
```

In [7]:
attr_pattern_list.append(r'XPD:OBJ .+?type="UMLAttribute" .+?type="string">(.+?)<\/XPD:ATTR>')
class_pattern_list.append(r'<XPD:OBJ .+? type="UMLClassView" .+? name="NameLabel" .+?XPD:ATTR name="Text".+?>(.+?)<\/XPD:ATTR>')

### Eclipse patterns

[In this file,](https://raw.githubusercontent.com/yuanheng1988/sedr/master/WebRoot/WEB-INF/classes/iscas/nfs/itechs/ese/servlets/servlets.uml) classes and attributes are defined following the OMG UML2 standard, as follows:

#### Class
```<packagedElement xmi:type="uml:Class" xmi:id="_Bkv94HeZEeCY6ocbkEuouA" name="SingleServlet">```

#### Attribute
```<ownedAttribute xmi:id="_G7hjY5X0EdyE2YnIO6QIiA" name="name" visibility="private" type="_GXK5JJX0EdyE2YnIO6QIiA">```

In [8]:
attr_pattern_list.append(r'<ownedAttribute.+?name="(\w+)"')
class_pattern_list.append(r'"uml:Class".+?name="(\w+)"')
class_pattern_list.append(r'name="(\w+)".+?"uml:Class"')

### Visual Studio patterns

[In this file,](https://raw.githubusercontent.com/aa8391093/LixuecongOnlineRepository/master/2015.7.14_UML_Learning_eg_class_time/2015.7.14_UML_Learning_eg_class_time/ModelDefinition/2015.7.14_UML_Learning_eg_class_time.uml) classes are defined as their own tags, having properties as subtags:

```
<class
    Id="1bcf7ec8-3157-477d-bbff-28c495793c6c"
    name="Time"
    isAbstract="false"
    isLeaf="false"
    isActiveClass="false">
    <ownedAttributesInternal>
      <property
        Id="98ba3fbd-3004-4f43-9e46-e0d026d5a0db"
        name="sec"
        visibility="Protected"
        isLeaf="false"
        isStatic="false"
        isReadOnly="false"
        isUnique="false"
        isDerived="false"
        isDerivedUnion="false"
        aggregation="None"
        isComposite="false" />
    </ownedAttributesInternal>
</class>
```

In [9]:
attr_pattern_list.append(r'<property .+?name="(\w+)"(?=.+?<\/ownedAttributesInternal>.+?<\/class>)')
class_pattern_list.append(r'<class.+?name="(\w+)"')

### Nodes and edges patterns

[In this file,](https://raw.githubusercontent.com/1berg/SimMA/master/Bilder/classes.uml) classes are defined as subproperties of the main ```classes``` object, and represented as a node: 

```
<nodes>
    <node x="285.0" y="343.5">classes.LightSensor</node>
    <node x="246.99999999999994" y="556.0">classes.Simulator</node>
    <node x="879.0" y="256.0">classes.Roboter</node>
    <node x="681.0" y="343.5">classes.TouchSensor</node>
    <node x="18.0" y="311.0">classes.Parcours</node>
    <node x="948.0" y="741.0">classes.Delay</node>
    <node x="0.0" y="741.0">classes.MotorPort</node>
    <node x="755.0" y="741.0">classes.UltrasonicSensor</node>
    <node x="547.0" y="741.0">classes.BildEinleser</node>
    <node x="394.921875" y="21.5">classes.SensorPort</node>
    <node x="0.0" y="0.0">classes.Leinwand</node>
    <node x="323.0" y="741.0">classes.Button</node>
    <node x="23.0" y="1123.0">classes.Motor</node>
    <node x="26.5" y="932.0">classes.NXTRegulatedMotor</node>
    <node x="483.0" y="343.5">classes.ColorSensor</node>
</nodes>
```

In [10]:
class_pattern_list.append(r'classes.(\w+)')

### XML UML tag patterns

[In this file,](https://raw.githubusercontent.com/arjunswaj/self-healing-water-networks/master/Documentation/UML/Class%20Diagram/class_diagram.uml) UML tags are used to define classes (```<UML:Class />```) and attributes (```<UML:Attribute />```): 

```
<UML:Class xmi.id = '-84--88--128-68--5c1f21aa:14811985826:-8000:0000000000000962' name = 'Source' visibility = 'public' isSpecification = 'false' isRoot = 'false' isLeaf = 'false' isAbstract = 'false' isActive = 'false'>
    <UML:Classifier.feature>
        <UML:Attribute xmi.id = '-84--88--128-68--5c1f21aa:14811985826:-8000:0000000000000963' name = 'type' visibility = 'public' isSpecification = 'false' ownerScope = 'instance' changeability = 'changeable' targetScope = 'instance'>
            <UML:StructuralFeature.multiplicity>
                <UML:Multiplicity xmi.id = '-84--88--128-68--5c1f21aa:14811985826:-8000:0000000000000964'>
                    <UML:Multiplicity.range>
                        <UML:MultiplicityRange xmi.id = '-84--88--128-68--5c1f21aa:14811985826:-8000:0000000000000965' lower = '1' upper = '1'/>
                    </UML:Multiplicity.range>
                </UML:Multiplicity>
            </UML:StructuralFeature.multiplicity>
            <UML:StructuralFeature.type>
                <UML:DataType href = 'http://argouml.org/profiles/uml14/default-uml14.xmi#-84-17--56-5-43645a83:11466542d86:-8000:000000000000087E'/>
            </UML:StructuralFeature.type>
        </UML:Attribute>
    </UML:Classifier.feature>
</UML:Class>
```

In [11]:
class_pattern_list.append(r'<UML:Class [^/]+?name ?= ?[\'|"]([\w\s]+?)[\'|"]')
attr_pattern_list.append(r'<UML:Attribute [^/]+?name ?= ?[\'|"]([\w\s]+?)[\'|"]')

## Search for patterns in files


In [12]:
flatten = lambda t: [item for sublist in t for item in sublist]

def get_classes_and_attributes_from_uml(uml):
    # Remove all line breaks and other "alternative" whitespace
    uml = uml.replace('\n', ' ').replace('\r', ' ').replace('\t', ' ')
    
    # Gather classes and attributes using defined patterns
    classes = flatten([re.findall(regex, uml) for regex in class_pattern_list])
    attributes = flatten([re.findall(regex, uml) for regex in attr_pattern_list])
    
    # Save classes and attributes in list if they hold values
    found_data = {}
    if len(classes) > 0:
        found_data['classes'] = list(dict.fromkeys(classes))
    if len(attributes) > 0:
        found_data['attributes'] = list(dict.fromkeys(attributes))
    
    return found_data

def get_file_text(url):
    # Get the file of the url
    response = requests.get(url)
    
    return response.text

def get_data_from_url(url):
    text = get_file_text(url)
    data = get_classes_and_attributes_from_uml(text)
    
    return data

In [13]:
get_data_from_url('https://raw.githubusercontent.com/ahmadpaudji/DTTI/master/Data%20Analisis/Model/UML/Relasiclass/RelasiClass.uml')

{'classes': ['Absen',
  'Model_absen',
  'Model_tambahan',
  'Model_izin',
  'Izin',
  'Model_kpi',
  'KPI',
  'Model_muhasabah',
  'Muhasabah',
  'Model_pegawai',
  'Pegawai'],
 'attributes': ['izin_rules array()',
  '$tambah_pegawai_rules',
  '$pegawai_rules']}

In [14]:
data_dict = {}

for file in tqdm(umls['raw_filepath'].tolist()):
    try:
        data = get_data_from_url(file)

        if len(data.keys()) > 0:
            data_dict[file] = data
    except:
        print(file)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=26872.0), HTML(value='')))

https://github.comhttps://github.com/Banana4Life/Exmatrikulation/blob/master/raw/class-diagram.uml
https://github.com/BeardAnnihilator/MagicTactil/raw/master/MagicTactilForWindows/MagicTactil/MagicTactilUML/ModelDefinition/MagicTactilUML.uml
https://github.com/BeardAnnihilator/MagicTactil/raw/master/MagicTactilForWindows/MagicTactil/MagicTactilUML/ModelDefinition/Package_1400.uml
https://github.com/BeardAnnihilator/MagicTactil/raw/master/MagicTactilForWindows/MagicTactil/MagicTactilUML/ModelDefinition/Package_1402.uml
https://github.com/BeardAnnihilator/MagicTactil/raw/master/MagicTactilForWindows/MagicTactil/MagicTactilUML/ModelDefinition/Package_1403.uml
https://github.com/BeardAnnihilator/MagicTactil/raw/master/MagicTactilForWindows/MagicTactil/MagicTactilUML/ModelDefinition/Package_1412.uml
https://github.com/BeardAnnihilator/MagicTactil/raw/master/MagicTactilForWindows/MagicTactil/MagicTactilUML/ModelDefinition/Package_1420.uml
https://github.com/BeardAnnihilator/MagicTactil/raw/m

https://github.com/bhanu550/FlyinTravel/raw/gh-pages/D4_State_Machines/StateMachine_Flight/StateMachine_Flight.uml
https://github.com/bhanu550/FlyinTravel/raw/gh-pages/D4_State_Machines/StateMachine_MemberPoints/StateMachine_MemberPoints.uml
https://github.com/biddyweb/Andromedia/raw/master/andromda-documentation/samples/timetracker/mda/src/main/uml2/andromda-common.profile.uml
https://github.com/biddyweb/Andromedia/raw/master/andromda-documentation/samples/timetracker/mda/src/main/uml2/andromda-persistence.profile.uml
https://github.com/biddyweb/Andromedia/raw/master/andromda-documentation/samples/timetracker/mda/src/main/uml2/andromda-presentation.profile.uml
https://github.com/biddyweb/Andromedia/raw/master/andromda-documentation/samples/timetracker/mda/src/main/uml2/andromda-service.profile.uml
https://github.com/biddyweb/Andromedia/raw/master/andromda-documentation/samples/timetracker/mda/src/main/uml2/andromda-webservice.profile.uml
https://github.com/biddyweb/Andromedia/raw/mast

https://github.com/BlackburnCollege/cs212-lab-sp16/raw/master/Documents/Lab05/coffeeshop-rekart-mueller.uml
https://github.com/BlackburnCollege/cs212-lab-sp16/raw/master/Documents/Lab05/coffeeshop-vanwormer.uml
https://github.com/BlackburnCollege/cs212-lab-sp16/raw/master/edu/blackburn/cs/cs212/bank/muellervanwormer/bank.uml
https://github.com/BlackburnCollege/cs212-lecture-sp16/raw/master/documents/bank.uml
https://github.com/BlackburnCollege/cs212-lecture-sp16/raw/master/documents/decoder-api-example..uml
https://github.com/BlackburnCollege/cs212-lecture-sp16/raw/master/documents/inheritance-examples-seat-bank.uml
https://github.com/BlackburnCollege/cs212-lecture-sp16/raw/master/documents/measurement-polymorphism.uml
https://github.com/blackgios/ZazilDWH/raw/master/librerias/Documentacion/Diagrama%20Data%20Warehouse.uml
https://github.com/BlackhoefStudios/BlackhoefStudios.Common/raw/master/Common.Modeling/ModelDefinition/Common.Modeling.uml
https://github.com/BlackhoefStudios/Blackho

https://github.com/BlueInt32/harmonizer/raw/master/Harmonizer.UML/ModelDefinition/Package2_2353.uml
https://github.com/coldcodecold/DeveloperSocialNetwork/raw/master/app/subservice/Trac-Collector/src/trac/collector-closs.uml
https://github.com/bmaggi/org.eclipse.papyrus.uml.simple/raw/master/org.eclipse.papyrus.uml.configuration.simplified/resources/templates/patternSingleton.uml
https://github.com/bmawji3/cs2340-MULE/raw/master/M3/cs2340.uml
https://github.com/BMX-Nick/Chunk/raw/master/Diagram.uml
https://github.com/bobo1993324/UDropCabin/raw/master/3rdParty/QtDropbox/doc/design.uml
https://github.com/boboo92/myooad/raw/master/model/ooad.uml
https://github.com/boggdan95/HojaDeTrabajo-10/raw/master/My.uml
https://github.com/boost-ext/di/raw/cpp14/doc/uml/coffee_maker.uml
https://github.com/boost-ext/di/raw/cpp14/doc/uml/di.uml
https://github.com/boost-ext/di/raw/cpp14/doc/uml/coffee_maker.uml
https://github.com/boost-ext/di/raw/cpp14/doc/uml/di.uml
https://github.com/Booster2/Booster2/

https://github.com/C204-242-DJSMT/Assignment-1/raw/master/DuncanWillcock/object%20model.uml
https://github.com/C204-242-DJSMT/Assignment-1/raw/master/Groupwork/unified%20class%20diagram.uml
https://github.com/cacafaca/polovni-automobili/raw/master/Dokumentacija/Polovni%20automobili.uml
https://github.com/cacafaca/polovni-automobili/raw/master/ModelingProject1/ModelDefinition/ModelingProject1.uml
https://github.com/cacafaca/polovni-automobili/raw/master/ModelingProject1/ModelDefinition/Package_0052.uml
https://github.com/CactaurJack/PersonalCode/raw/master/Digital%20Telephone%20Service_Project/DTSStart.uml
https://github.com/CactaurJack/PersonalCode/raw/master/Vending%20Machine/Class%20Diagram.uml
https://github.com/cadeteenlinea/cadeteenlinea_desktop/raw/master/DiagramadeComponentes/ModelDefinition/DiagramadeComponentes.uml
https://github.com/cadeteenlinea/cadeteenlinea_desktop/raw/master/DiagramadeComponentes/ModelDefinition/Package_1033.uml
https://github.com/cadeteenlinea/cadeteenli

https://github.com/CarlKlagba/jhipster-uml-editor/raw/master/www/lib/jhipster-uml/test/xmi/visualparadigm_enum_no_name_test.uml
https://github.com/CarlKlagba/jhipster-uml-editor/raw/master/www/lib/jhipster-uml/test/xmi/visualparadigm_enum_test.uml
https://github.com/CarlKlagba/jhipster-uml-editor/raw/master/www/lib/jhipster-uml/test/xmi/visualparadigm_no_attribute_name_test.uml
https://github.com/CarlKlagba/jhipster-uml-editor/raw/master/www/lib/jhipster-uml/test/xmi/visualparadigm_no_attribute_test.uml
https://github.com/CarlKlagba/jhipster-uml-editor/raw/master/www/lib/jhipster-uml/test/xmi/visualparadigm_no_class_name_test.uml
https://github.com/CarlKlagba/jhipster-uml-editor/raw/master/www/lib/jhipster-uml/test/xmi/visualparadigm_user_class_test.uml
https://github.com/CarlKlagba/jhipster-uml-editor/raw/master/www/lib/jhipster-uml/test/xmi/visualparadigm_wrong_typename.uml
https://github.com/carlos-olr/artigo-tdl-fatec-sjc/raw/master/LojaDeFlores/bin/resources/LojaFlor.uml
https://g

https://github.com/catompiler/celestial-battle/raw/master/celestial-battle.uml
https://github.com/caziertyler/CS3620_FinalProject/raw/master/fp-d3-diagram.uml
https://github.com/cbenoist/Slech/raw/master/SlechModel/ModelDefinition/SlechModel.uml
https://github.com/cbig/zupport/raw/master/zupport/docs/uml/TestClass.uml
https://github.com/cbrun/acceleo-launcher-examples/raw/master/uml-to-embedded-c/UMLProject/NonRegressionModel.uml
https://github.com/cbrun/emf-compare/raw/master/plugins/org.eclipse.emf.compare.mpatch.example/generalization/changed.uml
https://github.com/cbrun/emf-compare/raw/master/plugins/org.eclipse.emf.compare.mpatch.example/generalization/customer.uml
https://github.com/cbrun/emf-compare/raw/master/plugins/org.eclipse.emf.compare.mpatch.example/generalization/unchanged.uml
https://github.com/cbrun/emf-compare/raw/master/plugins/org.eclipse.emf.compare.mpatch.test/tests/uml/changed.uml
https://github.com/cbrun/emf-compare/raw/master/plugins/org.eclipse.emf.compare.mpa

https://github.com/jabba2324/RaaS/raw/master/RuntimeUML/ModelDefinition/Package6_1211.uml
https://github.com/jabba2324/RaaS/raw/master/RuntimeUML/ModelDefinition/Package7_1211.uml
https://github.com/jabba2324/RaaS/raw/master/RuntimeUML/ModelDefinition/Package8_1211.uml
https://github.com/jabba2324/RaaS/raw/master/RuntimeUML/ModelDefinition/RuntimeUML.uml
https://github.com/cdies/labs/raw/master/Media/MediaModelingDeagram/ModelDefinition/MediaModelingDeagram.uml
https://github.com/cdies/labs/raw/master/Media/MediaModelingDeagram/ModelDefinition/Package_0608.uml
https://github.com/cdies/labs/raw/master/Media/MediaModelingDeagram/ModelDefinition/Package_0612.uml
https://github.com/cdies/labs/raw/master/Media/MediaModelingDeagram/ModelDefinition/Package_0818.uml
https://github.com/cdies/labs/raw/master/Media/MediaModelingDeagram/ModelDefinition/Package_1240.uml
https://github.com/cdies/labs/raw/master/Media/MediaModelingDeagram/ModelDefinition/Package_1244.uml
https://github.com/cdies/labs

https://github.com/ChameleonChen/JFinalLearn/raw/master/UML/Untitled.uml
https://github.com/capptions/hotshot.js/raw/master/examples/aspnet/HotshotJsSample/ModelingProject1/ModelDefinition/ModelingProject1.uml
https://github.com/ChangeVision/astah-xmi-import-plugin/raw/master/src/test/resources/33.uml
https://github.com/changfeng777/PerformanceProfiler/raw/master/UML/PerformanceProfiler.uml
https://github.com/chanjettplus/Chanjet.TP/raw/master/src/Chanjet.TP.Modeling/ModelDefinition/Chanjet.TP.Modeling.uml
https://github.com/Charapao/ManualSuperdebug/raw/master/ManualMan.uml
https://github.com/charlesfire/OS-TP3-99/raw/master/ClassDiagram.uml
https://github.com/charlesfire/TP3-Crazy/raw/master/Uml.uml
https://github.com/charleslbryant/TestPipe/raw/master/documentation/Modeling/Modeling/ModelDefinition/Modeling.uml
https://github.com/charmesal/old.gets-removed/raw/master/SmartTankStation/ModelingProject1/ModelDefinition/ModelingProject1.uml
https://github.com/chear/NeuroView/raw/master/

https://github.com/cheyiliu/All-in-One/raw/master/res/cocos2d/cocos2d-x-3.3rc2-autoreleasepool.uml
https://github.com/cheyiliu/All-in-One/raw/master/res/cocos2d/cocos-airplain-war3.uml
https://github.com/cheyiliu/All-in-One/raw/master/res/vitamioRecorder.uml
https://github.com/chfoo/lyonlabs-org-mirror/raw/master/trunk/src/org/jbrain/qlink/connection/package.uml
https://github.com/chherbst/StudentManager/raw/master/EclipseProject/StudentManager/model/DataModel.uml
https://github.com/chibenwa/james-project/raw/master/src/site/resources/model-eclipse-modeler/model.uml
https://github.com/ChienHsiangLee/book/raw/master/bookstore_e6/bookstore.uml
https://github.com/ChienHsiangLee/example1/raw/master/bookstore_e6/bookstore.uml
https://github.com/ChienHsiangLee/FIrstSpringExample/raw/master/bookstore_e6/bookstore.uml
https://github.com/chillyistkult/Othello/raw/master/Classdiagram.uml
https://github.com/chinameepo/com_dengc_gradute_struts_learn/raw/master/astudentmgr/src/service/imp/testimp.u

https://github.com/chrupek/szkolkarz/raw/master/szkolkarzModelingProject/ModelDefinition/Package10_1731.uml
https://github.com/chrupek/szkolkarz/raw/master/szkolkarzModelingProject/ModelDefinition/Package11_1731.uml
https://github.com/chrupek/szkolkarz/raw/master/szkolkarzModelingProject/ModelDefinition/Package12_1731.uml
https://github.com/chrupek/szkolkarz/raw/master/szkolkarzModelingProject/ModelDefinition/Package13_1731.uml
https://github.com/chrupek/szkolkarz/raw/master/szkolkarzModelingProject/ModelDefinition/Package14_1731.uml
https://github.com/chrupek/szkolkarz/raw/master/szkolkarzModelingProject/ModelDefinition/Package15_1731.uml
https://github.com/chrupek/szkolkarz/raw/master/szkolkarzModelingProject/ModelDefinition/Package2_1731.uml
https://github.com/chrupek/szkolkarz/raw/master/szkolkarzModelingProject/ModelDefinition/Package4_1731.uml
https://github.com/chrupek/szkolkarz/raw/master/szkolkarzModelingProject/ModelDefinition/Package5_1731.uml
https://github.com/chrupek/szko

https://github.com/ClockWorkTeam/ClockWork/raw/master/Documenti/RQ/esterni/definizione_di_prodotto/img/server/dao/uml/UserDao.uml
https://github.com/ClockWorkTeam/ClockWork/raw/master/Documenti/RQ/esterni/definizione_di_prodotto/img/server/dao/uml/UserDaoSQL.uml
https://github.com/ClockWorkTeam/ClockWork/raw/master/Documenti/RQ/esterni/definizione_di_prodotto/img/server/functionmanager/uml/Converter.uml
https://github.com/ClockWorkTeam/ClockWork/raw/master/Documenti/RQ/esterni/definizione_di_prodotto/img/server/functionmanager/uml/packageFunctionmanager.uml
https://github.com/ClockWorkTeam/ClockWork/raw/master/Documenti/RQ/esterni/definizione_di_prodotto/img/server/shared/uml/packageShared.uml
https://github.com/ClockWorkTeam/ClockWork/raw/master/Documenti/RQ/esterni/definizione_di_prodotto/img/server/shared/uml/RecordMessage.uml
https://github.com/ClockWorkTeam/ClockWork/raw/master/Documenti/RQ/esterni/definizione_di_prodotto/img/server/shared/uml/Tutorials.uml
https://github.com/Cloc

https://github.com/evologica/generator-curio/raw/master/app/templates/mdl/model.uml
https://github.com/ConfuddledPenguin/Countdown/raw/master/Countdown.uml
https://github.com/ConfuddledPenguin/Folio-Tracker/raw/master/src/Folio%20Tracker.uml
https://github.com/connect2manu/test_helloworld_labs/raw/master/UMLModeling/Papyrus/model.uml
https://github.com/connect2manu/test_helloworld_labs/raw/master/UMLModeling/Papyrus/model2.uml
https://github.com/connect2manu/test_helloworld_labs/raw/master/UMLModeling/UMLDesigner/model.uml
https://github.com/connect2manu/test_helloworld_labs/raw/master/UMLModeling/UMLLab/UMLLab.uml
https://github.com/connect2manu/test_helloworld_labs/raw/master/UMLModeling/UMLLab/UMLLab2.uml
https://github.com/ControlSystemStudio/cs-studio/raw/master/applications/alarm/alarm-plugins/org.csstudio.alarm.beast.annunciator/doc/annunciator.uml
https://github.com/ControlSystemStudio/cs-studio/raw/master/applications/alarm/alarm-plugins/org.csstudio.alarm.beast.notifier/docs/

https://github.com/creckord/org.eclipse.uml2/raw/master/plugins/org.eclipse.uml2.uml.resources/profiles/UML2.profile.uml
https://github.com/creckord/org.eclipse.uml2/raw/master/plugins/org.eclipse.uml2.uml/model/Ecore.uml
https://github.com/creckord/org.eclipse.uml2/raw/master/plugins/org.eclipse.uml2.uml/model/UML.uml
https://github.com/CRISTELSoftware/CRISTEL-DHANULA/raw/master/ModelingProject1/ModelDefinition/ModelingProject1.uml
https://github.com/CRISTELSoftware/CRISTEL-DHANULA/raw/master/ModelingProject1/ModelDefinition/Package_2216.uml
https://github.com/cristianrosu/FoodCourt/raw/master/doc/yonder.1.0.uml
https://github.com/cristiantoader/fyp-pico/raw/master/PicoUserAuthenticator/PicoUserAuthenticator.uml
https://github.com/JosipRebrnjak/Java-bankingApplication-CollegeAssigment/raw/master/Banking%20application/lib/logback-1.0.13/docs/manual/images/chapters/appenders/appender.uml
https://github.com/JosipRebrnjak/Java-bankingApplication-CollegeAssigment/raw/master/Banking%20appli

https://github.com/csae1152/openmap/raw/master/src/openmap/com/bbn/openmap/omGraphics/awt/package.uml
https://github.com/CSalle/SE/raw/master/AeroStackApp/model.uml
https://github.com/CSalle/SE/raw/master/entrega_modelo/entrega_modelo.uml
https://github.com/CSalle/SE/raw/master/test/model.uml
https://github.com/cscfa/bartleby/raw/master/library/logBack/logback-1.1.3/logback-site/src/site/resources/manual/images/chapters/appenders/appender.uml
https://github.com/csddochi/dancing-links/raw/master/tetramino-3d/src/main/java/org/id/bjf/tetramino/My.uml
https://github.com/LajosCseppento/BME-NominateAndVote/raw/master/src/NominateAndVote/UML/ModelDefinition/Package3_1721.uml
https://github.com/LajosCseppento/BME-NominateAndVote/raw/master/src/NominateAndVote/UML/ModelDefinition/UML.uml
https://github.com/csiki/AnalogyRecovery/raw/master/docs/umldesign.uml
https://github.com/csiki/AntTerror/raw/master/docs/graftol.uml
https://github.com/csiki/ProjectWaifu/raw/master/doc/design.uml
https://git

https://github.com/demx8as6/02-MWTN-PoC/raw/master/models/20-reducedCoreModel-microwaveModel-test/uml/OnfModel-CoreModel/CoreModel.uml
https://github.com/demx8as6/02-MWTN-PoC/raw/master/models/20-reducedCoreModel-microwaveModel-test/uml/OpenModelProfile/OpenModel_Profile.profile.uml
https://github.com/demx8as6/02-MWTN-PoC/raw/master/models/ONF-CoreModel-1.1/uml/OnfModel-CoreModel/CoreModel.uml
https://github.com/demx8as6/02-MWTN-PoC/raw/master/models/ONF-CoreModel-1.1/uml/OpenModelProfile/OpenModel_Profile.profile.uml
https://github.com/demx8as6/02-MWTN-PoC/raw/master/models/tools/xmi2yang%20tool-v1.2/project/CoreModel.uml
https://github.com/demx8as6/02-MWTN-PoC/raw/master/models/tools/xmi2yang%20tool-v1.2/project/mwModel.uml
https://github.com/demylia/Tasks/raw/master/Task11_12_BinaryTree%26LINQ/AbstractFactory1/ModelDefinition/AbstractFactory1.uml
https://github.com/demylia/Tasks/raw/master/Task11_12_BinaryTree%26LINQ/AbstractFactory1/ModelDefinition/Package_1205.uml
https://github.c

https://github.com/devinh/KinectRCCar/raw/master/KinectRCCar.Modeling/ModelDefinition/KinectRCCar.Modeling.uml
https://github.com/devinh/KinectRCCar/raw/master/KinectRCCar.Modeling/ModelDefinition/Package_0013.uml
https://github.com/devinh/KinectRCCar/raw/master/KinectRCCar.Modeling/ModelDefinition/Package_1629.uml
https://github.com/devinh/KinectRCCar/raw/master/KinectRCCar.Modeling/ModelDefinition/Package_1736.uml
https://github.com/devinh/KinectRCCar/raw/master/KinectRCCar.Modeling/ModelDefinition/Package_1737.uml
https://github.com/devinh/KinectRCCar/raw/master/KinectRCCar.Modeling/ModelDefinition/Package_2012.uml
https://github.com/devinh/KinectRCCar/raw/master/KinectRCCar.Modeling/ModelDefinition/Package1_0013.uml
https://github.com/devinh/KinectRCCar/raw/master/KinectRCCar.Modeling/ModelDefinition/Package1_1736.uml
https://github.com/devinh/KinectRCCar/raw/master/KinectRCCar.Modeling/ModelDefinition/Package1_1737.uml
https://github.com/devinh/KinectRCCar/raw/master/KinectRCCar.M

https://github.com/DimitarGaydardzhiev/TelerikAcademy/raw/master/03.%20OOP/01.%20Defining-Classes-Part-1-Constructors-Properties/ModelingProject1/ModelDefinition/ModelingProject1.uml
https://github.com/DimitarGaydardzhiev/TelerikAcademy/raw/master/03.%20OOP/04.%20Object-Oriented-Programming-Principles-Part-1/School/ModelDefinition/School.uml
https://github.com/dinglevin/levin-learn/raw/master/levin-learn-ehcache/src/main/resources/design/ehcache.uml
https://github.com/dinglevin/levin-learn/raw/master/levin-learn-ehcache/src/main/resources/design/guava%20cache.uml
https://github.com/dingxinbei/Battery110Server/raw/master/UMLBatteryServer/ModelDefinition/UMLBatteryServer.uml
https://github.com/dinhbao10t4/hello-world/raw/master/abc.uml
https://github.com/diniodinev/SpringRSS/raw/master/src/main/resources/diagrams/CoreClasses.uml
https://github.com/dinkelstefan/Project-Qars-Windesheim/raw/master/Qars/Diagrams/ModelDefinition/Diagrams.uml
https://github.com/dinkelstefan/Project-Qars-Windes

https://github.com/DistributedSystemGroup4/Distributed-Taxi-Central/raw/master/_group4.termassignment/TaxiSystem.uml
https://github.com/DistributedSystemGroup4/Distributed-Taxi-Central/raw/master/_group4.termassignment/User.uml
https://github.com/DistributedSystemGroup4/Distributed-Taxi-Central/raw/master/_group4.termassignment/UserClient.uml
https://github.com/DistributedSystemGroup4/Distributed-Taxi-Central/raw/master/_group4.termassignment/UserMQTT.uml
https://github.com/DistributedSystemGroup4/Distributed-Taxi-Central/raw/master/group4.termassignment.taxisystem%20-%202014-05-04-1607/group4.termassignment.taxisystem/LatLongToAddressConverter.uml
https://github.com/DistributedSystemGroup4/Distributed-Taxi-Central/raw/master/group4.termassignment.taxisystem%20-%202014-05-04-1607/group4.termassignment.taxisystem/TaxiClient.uml
https://github.com/DistributedSystemGroup4/Distributed-Taxi-Central/raw/master/group4.termassignment.taxisystem%20-%202014-05-04-1607/group4.termassignment.taxis

https://github.com/dorbacao/frotas/raw/master/backend/Fleet.WebApi/Arquitetura/ModelDefinition/Arquitetura.uml
https://github.com/DormitoryTeam/Dormitory/raw/master/doc/uml/dormitory.uml
https://github.com/Dotneteer/SeempleStart/raw/master/SeemplestCloud/Models/SeemplestCloud.Models/ModelDefinition/SeemplestCloud.Models.uml
https://github.com/doubleirish/lecture2_jpa_Intro/raw/master/src/main/java/edu/uw/data/lecture2/model/uml_user.uml
https://github.com/DouglasSI/survey-tcc/raw/master/Requisitos/caso%20de%20uso%20-%20MySurvey.uml
https://github.com/dowe/bettenverwaltung/raw/master/UML_Model/ModelDefinition/Package_1424.uml
https://github.com/drasive/docxes/raw/master/src/Docxes.Model/ModelDefinition/Package_2229.uml
https://github.com/drasive/docxes/raw/master/src/Docxes.Model/ModelDefinition/Package_2233.uml
https://github.com/drasive/docxes/raw/master/src/Docxes.Model/ModelDefinition/Package_2239.uml
https://github.com/drasive/docxes/raw/master/src/Docxes.Model/ModelDefinition/Pack

https://github.com/dresden-ocl/legacy/raw/master/ocl20forEclipse/eclipse/tudresden.ocl20.pivot.examples.royalandloyal/model/royalsandloyals.uml
https://github.com/dresden-ocl/legacy/raw/master/ocl20forEclipse/eclipse/tudresden.ocl20.pivot.examples.simple/model/simple.uml
https://github.com/dresden-ocl/legacy/raw/master/ocl20forEclipse/eclipse/tudresden.ocl20.pivot.examples.university/model/university.uml
https://github.com/dresden-ocl/legacy/raw/master/ocl20forEclipse/eclipse/tudresden.ocl20.pivot.examples.university/model/university_complex.uml
https://github.com/dresden-ocl/legacy/raw/master/ocl20forEclipse/experimental/org.dresdenocl.examples.uml/modelinstances/SequenceDIagram.uml
https://github.com/dresden-ocl/legacy/raw/master/ocl20forEclipse/experimental/org.dresdenocl.util.deft.example/metamodel/uml.uml
https://github.com/dresden-ocl/legacy/raw/master/ocl20forEclipse/othertools/tudresden.ocl20.benchmark.sql/model/car.uml
https://github.com/dresden-ocl/legacy/raw/master/ocl20forE

https://github.com/dsg-unipr/patrol/raw/master/uml/P2PGame.uml
https://github.com/dsg-unipr/patrol/raw/master/uml/receiveInfoReq.uml
https://github.com/dsg-unipr/patrol/raw/master/uml/receiveReqMove.uml
https://github.com/dsp56001/CPP2014/raw/master/cppDemos/ModelingProjectDuck/ModelDefinition/ModelingProjectDuck.uml
https://github.com/dstrueber/splittr/raw/master/de.uni_marburg.splittr.evaluation/input/gmf/model/model.uml
https://github.com/dstrueber/splittr/raw/master/de.uni_marburg.splittr.evaluation/input/medical/model/model.uml
https://github.com/dstrueber/splittr/raw/master/de.uni_marburg.splittr.evaluation/input/pruneduml/model/model.uml
https://github.com/dstrueber/splittr/raw/master/de.uni_marburg.splittr.evaluation/input/r2ml/model/model.uml
https://github.com/dstrueber/splittr/raw/master/de.uni_marburg.splittr.evaluation/input/uml/model/model.uml
https://github.com/dstrueber/splittr/raw/master/de.uni_marburg.splittr.evaluation/input/wasl/model/model.uml
https://github.com/ds

https://github.com/eclipse/emf.compare/raw/master/performance/org.eclipse.emf.compare.tests.performance/src/data/models/model_size_large/modified/package_0_38.uml
https://github.com/eclipse/emf.compare/raw/master/performance/org.eclipse.emf.compare.tests.performance/src/data/models/model_size_large/modified/package_0_39.uml
https://github.com/eclipse/emf.compare/raw/master/performance/org.eclipse.emf.compare.tests.performance/src/data/models/model_size_large/modified/package_0_4.uml
https://github.com/eclipse/emf.compare/raw/master/performance/org.eclipse.emf.compare.tests.performance/src/data/models/model_size_large/modified/package_0_40.uml
https://github.com/eclipse/emf.compare/raw/master/performance/org.eclipse.emf.compare.tests.performance/src/data/models/model_size_large/modified/package_0_41.uml
https://github.com/eclipse/emf.compare/raw/master/performance/org.eclipse.emf.compare.tests.performance/src/data/models/model_size_large/modified/package_0_42.uml
https://github.com/ecli

https://github.com/eclipse/emf.compare/raw/master/performance/org.eclipse.emf.compare.tests.performance/src/data/models/model_size_large/modified/package_4_14.uml
https://github.com/eclipse/emf.compare/raw/master/performance/org.eclipse.emf.compare.tests.performance/src/data/models/model_size_large/modified/package_4_15.uml
https://github.com/eclipse/emf.compare/raw/master/performance/org.eclipse.emf.compare.tests.performance/src/data/models/model_size_large/modified/package_4_16.uml
https://github.com/eclipse/emf.compare/raw/master/performance/org.eclipse.emf.compare.tests.performance/src/data/models/model_size_large/modified/package_4_17.uml
https://github.com/eclipse/emf.compare/raw/master/performance/org.eclipse.emf.compare.tests.performance/src/data/models/model_size_large/modified/package_4_18.uml
https://github.com/eclipse/emf.compare/raw/master/performance/org.eclipse.emf.compare.tests.performance/src/data/models/model_size_large/modified/package_4_19.uml
https://github.com/ecl

https://github.com/eclipse/emf.compare/raw/master/performance/org.eclipse.emf.compare.tests.performance/src/data/models/model_size_large/modified/package_4_88.uml
https://github.com/eclipse/emf.compare/raw/master/performance/org.eclipse.emf.compare.tests.performance/src/data/models/model_size_large/modified/package_4_89.uml
https://github.com/eclipse/emf.compare/raw/master/performance/org.eclipse.emf.compare.tests.performance/src/data/models/model_size_large/modified/package_4_9.uml
https://github.com/eclipse/emf.compare/raw/master/performance/org.eclipse.emf.compare.tests.performance/src/data/models/model_size_large/modified/package_4_90.uml
https://github.com/eclipse/emf.compare/raw/master/performance/org.eclipse.emf.compare.tests.performance/src/data/models/model_size_large/modified/package_4_91.uml
https://github.com/eclipse/emf.compare/raw/master/performance/org.eclipse.emf.compare.tests.performance/src/data/models/model_size_large/modified/package_4_92.uml
https://github.com/ecli

https://github.com/eclipse/emf.compare/raw/master/performance/org.eclipse.emf.compare.tests.performance/src/data/models/model_size_large/modified/package_5_60.uml
https://github.com/eclipse/emf.compare/raw/master/performance/org.eclipse.emf.compare.tests.performance/src/data/models/model_size_large/modified/package_5_61.uml
https://github.com/eclipse/emf.compare/raw/master/performance/org.eclipse.emf.compare.tests.performance/src/data/models/model_size_large/modified/package_5_62.uml
https://github.com/eclipse/emf.compare/raw/master/performance/org.eclipse.emf.compare.tests.performance/src/data/models/model_size_large/modified/package_5_63.uml
https://github.com/eclipse/emf.compare/raw/master/performance/org.eclipse.emf.compare.tests.performance/src/data/models/model_size_large/modified/package_5_64.uml
https://github.com/eclipse/emf.compare/raw/master/performance/org.eclipse.emf.compare.tests.performance/src/data/models/model_size_large/modified/package_5_65.uml
https://github.com/ecl

https://github.com/eclipse/emf.compare/raw/master/performance/org.eclipse.emf.compare.tests.performance/src/data/models/model_size_large/original/package_3_75.uml
https://github.com/eclipse/emf.compare/raw/master/performance/org.eclipse.emf.compare.tests.performance/src/data/models/model_size_large/original/package_3_76.uml
https://github.com/eclipse/emf.compare/raw/master/performance/org.eclipse.emf.compare.tests.performance/src/data/models/model_size_large/original/package_3_77.uml
https://github.com/eclipse/emf.compare/raw/master/performance/org.eclipse.emf.compare.tests.performance/src/data/models/model_size_large/original/package_3_78.uml
https://github.com/eclipse/emf.compare/raw/master/performance/org.eclipse.emf.compare.tests.performance/src/data/models/model_size_large/original/package_3_79.uml
https://github.com/eclipse/emf.compare/raw/master/performance/org.eclipse.emf.compare.tests.performance/src/data/models/model_size_large/original/package_3_8.uml
https://github.com/ecli

https://github.com/eclipse/emf.compare/raw/master/performance/org.eclipse.emf.compare.tests.performance/src/data/models/model_size_large/original/package_4_57.uml
https://github.com/eclipse/emf.compare/raw/master/performance/org.eclipse.emf.compare.tests.performance/src/data/models/model_size_large/original/package_4_58.uml
https://github.com/eclipse/emf.compare/raw/master/performance/org.eclipse.emf.compare.tests.performance/src/data/models/model_size_large/original/package_4_59.uml
https://github.com/eclipse/emf.compare/raw/master/performance/org.eclipse.emf.compare.tests.performance/src/data/models/model_size_large/original/package_4_6.uml
https://github.com/eclipse/emf.compare/raw/master/performance/org.eclipse.emf.compare.tests.performance/src/data/models/model_size_large/original/package_4_60.uml
https://github.com/eclipse/emf.compare/raw/master/performance/org.eclipse.emf.compare.tests.performance/src/data/models/model_size_large/original/package_4_61.uml
https://github.com/ecli

https://github.com/eclipse/emf.compare/raw/master/performance/org.eclipse.emf.compare.tests.performance/src/data/models/model_size_large/original/package_5_34.uml
https://github.com/eclipse/emf.compare/raw/master/performance/org.eclipse.emf.compare.tests.performance/src/data/models/model_size_large/original/package_5_35.uml
https://github.com/eclipse/emf.compare/raw/master/performance/org.eclipse.emf.compare.tests.performance/src/data/models/model_size_large/original/package_5_36.uml
https://github.com/eclipse/emf.compare/raw/master/performance/org.eclipse.emf.compare.tests.performance/src/data/models/model_size_large/original/package_5_37.uml
https://github.com/eclipse/emf.compare/raw/master/performance/org.eclipse.emf.compare.tests.performance/src/data/models/model_size_large/original/package_5_38.uml
https://github.com/eclipse/emf.compare/raw/master/performance/org.eclipse.emf.compare.tests.performance/src/data/models/model_size_large/original/package_5_39.uml
https://github.com/ecl

https://github.com/geovannesdi/residential-life/raw/master/AnaliseProjeto/Casos%20de%20uso%20RL.uml
https://github.com/gmcfall/semantictools-tutorial/raw/master/src/main/resources/uml/lis.uml
https://github.com/gmcfall/semantictools/raw/master/examples/lti/src/main/resources/uml/LTI_v2.uml
https://github.com/GRA-UML/tool/raw/master/plugins/org.ijis.gra.uml.library/target/classes/uml/NIEM-Reference/NIEM-Reference-common-fbi.uml
https://github.com/GRA-UML/tool/raw/master/plugins/org.ijis.gra.uml.library/target/classes/uml/NIEM-Reference/NIEM-Reference-common-fips_10-4.uml
https://github.com/Hatsen/Hovedopave/raw/master/HovedOpgave/ModelingProjectClient/ModelDefinition/Package6_1641.uml
https://github.com/Hatsen/Hovedopave/raw/master/HovedOpgave/ModelingProjectClient/ModelDefinition/Package7_1641.uml
https://github.com/Hatsen/Hovedopave/raw/master/HovedOpgave/UMLHovedOpgave/ModelDefinition/UMLHovedOpgave.uml
https://github.com/HATtrick-games/ICT309/raw/master/Documentation/uml.uml
https:/

https://github.com/HengCao/oslc/raw/master/PapyrusProvider/UMLsource/model.uml
https://github.com/henrihs/ALeTrainSystem/raw/master/Calibration.uml
https://github.com/henrihs/ALeTrainSystem/raw/master/ColorSensor.uml
https://github.com/henrihs/ALeTrainSystem/raw/master/ColorSensorTest.uml
https://github.com/henrihs/ALeTrainSystem/raw/master/ColorSensorThreaded.uml
https://github.com/henrihs/ALeTrainSystem/raw/master/DoorController.uml
https://github.com/henrihs/ALeTrainSystem/raw/master/EV3.uml
https://github.com/henrihs/ALeTrainSystem/raw/master/FakeTrainHardware.uml
https://github.com/henrihs/ALeTrainSystem/raw/master/HeadState.uml
https://github.com/henrihs/ALeTrainSystem/raw/master/LockCoordinator.uml
https://github.com/henrihs/ALeTrainSystem/raw/master/LockHandler.uml
https://github.com/henrihs/ALeTrainSystem/raw/master/LockParticipant.uml
https://github.com/henrihs/ALeTrainSystem/raw/master/MapController.uml
https://github.com/henrihs/ALeTrainSystem/raw/master/MeasureTimeMicros.u

https://github.com/Holmewi/1dv437-project/raw/master/Hypothermia/GameClassDiagram/ModelDefinition/Package1_1445.uml
https://github.com/Holmewi/1dv437-project/raw/master/Hypothermia/GameClassDiagram/ModelDefinition/Package1_1533.uml
https://github.com/Holmewi/1dv437-project/raw/master/Hypothermia/GameClassDiagram/ModelDefinition/Package1_1622.uml
https://github.com/Holmewi/1dv437-project/raw/master/Hypothermia/GameClassDiagram/ModelDefinition/Package1_2357.uml
https://github.com/Holmewi/1dv437-project/raw/master/Hypothermia/GameClassDiagram/ModelDefinition/Package2_1533.uml
https://github.com/Holmewi/1dv607-jh222qr-workshops/raw/master/2-design/MemberRegistry/ModelingProject/ModelDefinition/ModelingProject.uml
https://github.com/Holmewi/1dv607-jh222qr-workshops/raw/master/2-design/MemberRegistry/ModelingProject/ModelDefinition/Package_1410.uml
https://github.com/Holmewi/1dv607-jh222qr-workshops/raw/master/2-design/MemberRegistry/ModelingProject/ModelDefinition/Package2_1410.uml
https://

https://github.com/ichigotake/Mikuregator-Aggregator/raw/master/src/main/scala/net/ichigotake/mikuregator/aggregator/mikuregator.uml
https://github.com/ichortechnology/ScreenVc/raw/master/LoadInvestorIndustries/LoadInvestorIndustries.Modeling/ModelDefinition/LoadInvestorIndustries.Model.uml
https://github.com/ichortechnology/ScreenVc/raw/master/LoadInvestorIndustries/LoadInvestorIndustries.Modeling/ModelDefinition/Package_1424.uml
https://github.com/ichortechnology/ScreenVc/raw/master/LoadInvestorIndustries/LoadInvestorIndustries.Modeling/ModelDefinition/Package_1909.uml
https://github.com/ickby/FreeCAD_constraint/raw/master/src/Doc/FreeCAD.uml
https://github.com/ickby/FreeCAD_constraint/raw/master/src/Doc/PythonClassModel.uml
https://github.com/iclodoaldo/JavaWeb/raw/master/banco_laboratorio.uml
https://github.com/ICT-Heroes/Hyperion/raw/master/src/view/hyperion/model.uml
https://github.com/idavydko/DBPGroup/raw/master/DBP.Infrastructure/ModelDefinition/DBP.Infrastructure.uml
https://

https://github.com/iluvwnba/GymImprover/raw/master/GymImprover/Project%20Diagram/ModelDefinition/Package1_0018.uml
https://github.com/iluvwnba/GymImprover/raw/master/GymImprover/Project%20Diagram/ModelDefinition/Package10_0018.uml
https://github.com/iluvwnba/GymImprover/raw/master/GymImprover/Project%20Diagram/ModelDefinition/Package11_0018.uml
https://github.com/iluvwnba/GymImprover/raw/master/GymImprover/Project%20Diagram/ModelDefinition/Package12_0018.uml
https://github.com/iluvwnba/GymImprover/raw/master/GymImprover/Project%20Diagram/ModelDefinition/Package13_0018.uml
https://github.com/iluvwnba/GymImprover/raw/master/GymImprover/Project%20Diagram/ModelDefinition/Package14_0018.uml
https://github.com/iluvwnba/GymImprover/raw/master/GymImprover/Project%20Diagram/ModelDefinition/Package15_0018.uml
https://github.com/iluvwnba/GymImprover/raw/master/GymImprover/Project%20Diagram/ModelDefinition/Package2_0018.uml
https://github.com/iluvwnba/GymImprover/raw/master/GymImprover/Project%20D

https://github.com/info-sharing-environment/NIEM-Modeling-Tool/raw/master/plugins/org.search.niem.uml.library/src/main/resources/uml/NIEM-Reference/NIEM-Reference-common-lasd.uml
https://github.com/info-sharing-environment/NIEM-Modeling-Tool/raw/master/plugins/org.search.niem.uml.library/src/main/resources/uml/NIEM-Reference/NIEM-Reference-common-mmucc_2.uml
https://github.com/info-sharing-environment/NIEM-Modeling-Tool/raw/master/plugins/org.search.niem.uml.library/src/main/resources/uml/NIEM-Reference/NIEM-Reference-common-mn_offense.uml
https://github.com/info-sharing-environment/NIEM-Modeling-Tool/raw/master/plugins/org.search.niem.uml.library/src/main/resources/uml/NIEM-Reference/NIEM-Reference-common-nga.uml
https://github.com/info-sharing-environment/NIEM-Modeling-Tool/raw/master/plugins/org.search.niem.uml.library/src/main/resources/uml/NIEM-Reference/NIEM-Reference-common-nlets.uml
https://github.com/info-sharing-environment/NIEM-Modeling-Tool/raw/master/plugins/org.search.nie

https://github.com/inved1/ch.bfh.bti7301q.monopoly/raw/master/monopoly.doc/ModelDefinition/Package3_2122.uml
https://github.com/inved1/ch.bfh.bti7301q.monopoly/raw/master/monopoly.doc/ModelDefinition/Package4_0936.uml
https://github.com/inved1/ch.bfh.bti7301q.monopoly/raw/master/monopoly.doc/ModelDefinition/Package4_1829.uml
https://github.com/inved1/ch.bfh.bti7301q.monopoly/raw/master/monopoly.doc/ModelDefinition/Package4_2122.uml
https://github.com/inved1/ch.bfh.bti7301q.monopoly/raw/master/monopoly.doc/ModelDefinition/Package5_0936.uml
https://github.com/inved1/ch.bfh.bti7301q.monopoly/raw/master/monopoly.doc/ModelDefinition/Package5_1829.uml
https://github.com/inved1/ch.bfh.bti7301q.monopoly/raw/master/monopoly.doc/ModelDefinition/Package5_2122.uml
https://github.com/inved1/ch.bfh.bti7301q.monopoly/raw/master/monopoly.doc/ModelDefinition/Package6_1829.uml
https://github.com/inved1/ch.bfh.bti7301q.monopoly/raw/master/monopoly.doc/ModelDefinition/Package6_2122.uml
https://github.com/

https://github.com/ismalote/SistemaCocheras/raw/master/uml/tpoIOO%20v1.3.uml
https://github.com/it-workshop/UniSched/raw/master/uml/UniSched.uml
https://github.com/itaiezra/Project/raw/master/KeyWord_Extractor/ModelingProject1/ModelDefinition/ModelingProject1.uml
https://github.com/itaiezra/Project/raw/master/KeyWord_Extractor/ModelingProject1/ModelDefinition/Package_1704.uml
https://github.com/itaiezra/Project/raw/master/KeyWord_Extractor/ModelingProject1/ModelDefinition/Package_1705.uml
https://github.com/itaiezra/Project/raw/master/KeyWord_Extractor/ModelingProject1/ModelDefinition/Package1_1704.uml
https://github.com/itaiezra/Project/raw/master/KeyWord_Extractor/ModelingProject1/ModelDefinition/Package1_1705.uml
https://github.com/itaiezra/Project/raw/master/KeyWord_Extractor/ModelingProject1/ModelDefinition/Package2_1704.uml
https://github.com/itaiezra/Project/raw/master/KeyWord_Extractor/ModelingProject1/ModelDefinition/Package3_1704.uml
https://github.com/iTEEteam/Dokumentaciok/

https://github.com/iwantc00kies/TrouverSaRoute/raw/master/uml/modelisation.uml
https://github.com/izontm/SML/raw/master/br.ufes.inf.ctx/model/ctx.profile.uml
https://github.com/izontm/SML/raw/master/br.ufes.inf.sml.example/model/example.uml
https://github.com/izontm/SML/raw/master/br.ufes.inf.sml.example/model/my.uml
https://github.com/jackandking/TickForecast/raw/master/TickForecast_Realtime.uml
https://github.com/jackball2008/ACW/raw/master/C%2B%2B%26Graphics_UML/C%2B%2B_Graphics_ACW.uml
https://github.com/jackball2008/ACW/raw/master/C%2B%2B%26Graphics_UML/C%2B%2B_Graphics_Utilities_ACW.uml
https://github.com/Jackie-Innover/3VR/raw/master/Quantum%20Tape/Tape%20Simulator/src/TarDiagram/TarDiagram/ModelDefinition/TarDiagram.uml
https://github.com/jackjackrene/SAD_Pi/raw/master/Production/Src/GUIModeling/ModelDefinition/GUIModeling.uml
https://github.com/jackjackrene/SAD_Pi/raw/master/Production/Src/GUIModeling/ModelDefinition/Package_1710.uml
https://github.com/jackjackrene/SAD_Pi/raw/

https://github.com/jagalindo/puzzle-testing/raw/master/downloads/REFMVPC/MVSC/GPLSPLOT/MSTPrim/MSTPrim.uml
https://github.com/jagalindo/puzzle-testing/raw/master/downloads/REFMVPC/MVSC/GPLSPLOT/Number/Number.uml
https://github.com/jagalindo/puzzle-testing/raw/master/downloads/REFMVPC/MVSC/GPLSPLOT/StronglyConnected/StronglyConnected.uml
https://github.com/jagalindo/puzzle-testing/raw/master/downloads/REFMVPC/MVSC/GPLSPLOT/TestProg/TestProg.uml
https://github.com/jagalindo/puzzle-testing/raw/master/downloads/REFMVPC/MVSC/GPLSPLOT/Transpose/Transpose.uml
https://github.com/jagalindo/puzzle-testing/raw/master/downloads/REFMVPC/MVSC/GPLSPLOT/UndirectedWithEdges/UndirecteWithEdges.uml
https://github.com/jagalindo/puzzle-testing/raw/master/downloads/REFMVPC/MVSC/GPLSPLOT/Weighted/Weighted.uml
https://github.com/jagmeet-chaudhary/ECA/raw/master/ECAModelling/ModelDefinition/ECAModelling.uml
https://github.com/jagruti181/clg-project/raw/master/class%20diagram.uml
https://github.com/jagruti181/c

https://github.com/jalvarez54/NorthWind54/raw/master/Main/ModelingProject/ModelDefinition/Package_2200.uml
https://github.com/jalvarez54/NorthWind54/raw/master/Main/ModelingProject/ModelDefinition/Package_2204.uml
https://github.com/jalvarez54/NorthWind54/raw/master/Main/ModelingProject/ModelDefinition/Package_2206.uml
https://github.com/jalvarez54/NorthWind54/raw/master/Main/ModelingProject/ModelDefinition/Package_2209.uml
https://github.com/jalvarez54/NorthWind54/raw/master/Main/ModelingProject/ModelDefinition/Package1_0745.uml
https://github.com/jalvarez54/NorthWind54/raw/master/Main/ModelingProject/ModelDefinition/Package1_2132.uml
https://github.com/jalvarez54/NorthWind54/raw/master/Main/ModelingProject/ModelDefinition/Package1_2200.uml
https://github.com/jalvarez54/NorthWind54/raw/master/Main/ModelingProject/ModelDefinition/Package1_2204.uml
https://github.com/jalvarez54/NorthWind54/raw/master/Main/ModelingProject/ModelDefinition/Package1_2206.uml
https://github.com/jalvarez54/No

https://github.com/janjic/Patterns_PHP/raw/master/Creational/AbstractFactory/Example1/AbstractBookFactory.uml
https://github.com/janjic/Patterns_PHP/raw/master/Creational/AbstractFactory/Example2/ApptEncoder.uml
https://github.com/janjic/Patterns_PHP/raw/master/Creational/AbstractFactory/Example3/FlashWidgetHelper.uml
https://github.com/janjic/Patterns_PHP/raw/master/Creational/Builder/Example1/uml1.uml
https://github.com/janjic/Patterns_PHP/raw/master/Creational/Builder/Example2-Tree/DumbUnbalancedTreeBuilder.uml
https://github.com/janjic/Patterns_PHP/raw/master/Creational/Builder/Example3/Person.uml
https://github.com/janjic/Patterns_PHP/raw/master/Creational/Builder/Iterating%20through%20an%20Interface/Product.uml
https://github.com/janjic/Patterns_PHP/raw/master/Creational/Factory/Example1/SamsPHPBook.uml
https://github.com/janjic/Patterns_PHP/raw/master/Creational/Factory/FactoryMethod/Circle.uml
https://github.com/janjic/Patterns_PHP/raw/master/Creational/Factory/phpfactory-withM

https://github.com/jcamenen/AlpaReserv/raw/master/AlpaReserv/Ressources/diagrammes/diagrammes%20poubelle/SequenceDiagram_modifierReservation.uml
https://github.com/jcamenen/AlpaReserv/raw/master/AlpaReserv/Ressources/diagrammes/diagrammes%20sequence/SequenceDiagram_consultationLoueur.uml
https://github.com/jcamenen/AlpaReserv/raw/master/AlpaReserv/Ressources/diagrammes/diagrammes%20sequence/SequenceDiagram_nouvelleReservation.uml
https://github.com/jcamenen/AlpaReserv/raw/master/AlpaReserv/Ressources/diagrammes/diagrammes%20sequence/SequenceDiagram_supprimerReservation.uml
https://github.com/jcamenen/AlpaReserv/raw/master/AlpaReserv/Ressources/diagrammes/diagrammes%20sequence/SequenceDiagram_VerificationNouvelleReservation.uml
https://github.com/jcamenen/AlpaReserv/raw/master/AlpaReserv/Ressources/diagrammes/diagrammes%20sequence/SequenceDiagram_voirStatistiques.uml
https://github.com/jcamenen/AlpaReserv/raw/master/AlpaReserv/Ressources/UML/UML.uml
https://github.com/jccastrejon/model2

https://github.com/JEMMtastic/Capstone/raw/master/Capstone/ModelingProjectForUML/ModelDefinition/ModelingProjectForUML.uml
https://github.com/JEMMtastic/Capstone/raw/master/Capstone/ModelingProjectForUML/ModelDefinition/Package_2105.uml
https://github.com/JEMMtastic/Capstone/raw/master/Capstone/ModelingProjectForUML/ModelDefinition/Package1_2105.uml
https://github.com/JEMMtastic/Capstone/raw/master/Capstone/ModelingProjectForUML/ModelDefinition/Package2_2105.uml
https://github.com/JEMMtastic/Capstone/raw/master/Capstone/ModelingProjectForUML/ModelDefinition/Package3_2105.uml
https://github.com/JEMMtastic/CapstoneCharity/raw/master/Capstone/ModelingProjectForUML/ModelDefinition/ModelingProjectForUML.uml
https://github.com/JEMMtastic/CapstoneCharity/raw/master/Capstone/ModelingProjectForUML/ModelDefinition/Package_2105.uml
https://github.com/JEMMtastic/CapstoneCharity/raw/master/Capstone/ModelingProjectForUML/ModelDefinition/Package1_2105.uml
https://github.com/JEMMtastic/CapstoneCharity

https://github.com/jgarverick/tetsuo/raw/master/Tetsuo.Model/ModelDefinition/Package_2108.uml
https://github.com/jgarverick/tetsuo/raw/master/Tetsuo.Model/ModelDefinition/Package1_2106.uml
https://github.com/jgarverick/tetsuo/raw/master/Tetsuo.Model/ModelDefinition/Package1_2107.uml
https://github.com/jgarverick/tetsuo/raw/master/Tetsuo.Model/ModelDefinition/Package1_2108.uml
https://github.com/jgarverick/tetsuo/raw/master/Tetsuo.Model/ModelDefinition/Package2_2106.uml
https://github.com/jgarverick/tetsuo/raw/master/Tetsuo.Model/ModelDefinition/Package2_2107.uml
https://github.com/jgarverick/tetsuo/raw/master/Tetsuo.Model/ModelDefinition/Package2_2108.uml
https://github.com/jgarverick/tetsuo/raw/master/Tetsuo.Model/ModelDefinition/Package3_2106.uml
https://github.com/jgarverick/tetsuo/raw/master/Tetsuo.Model/ModelDefinition/Package3_2107.uml
https://github.com/jgarverick/tetsuo/raw/master/Tetsuo.Model/ModelDefinition/Package3_2108.uml
https://github.com/jgarverick/tetsuo/raw/master/Tet

https://github.com/Jiwan/Civilisation/raw/master/Guegant-Lagrange-ProjetPOO/projet/ModelDefinition/Package_1121.uml
https://github.com/Jiwan/Civilisation/raw/master/Guegant-Lagrange-ProjetPOO/projet/ModelDefinition/Package_1303.uml
https://github.com/Jiwan/Civilisation/raw/master/Guegant-Lagrange-ProjetPOO/projet/ModelDefinition/Package_1533.uml
https://github.com/Jiwan/Civilisation/raw/master/Guegant-Lagrange-ProjetPOO/projet/ModelDefinition/Package_1838.uml
https://github.com/Jiwan/Civilisation/raw/master/Guegant-Lagrange-ProjetPOO/projet/ModelDefinition/Package_1848.uml
https://github.com/Jiwan/Civilisation/raw/master/Guegant-Lagrange-ProjetPOO/projet/ModelDefinition/Package_1905.uml
https://github.com/Jiwan/Civilisation/raw/master/Guegant-Lagrange-ProjetPOO/projet/ModelDefinition/Package_1909.uml
https://github.com/Jiwan/Civilisation/raw/master/Guegant-Lagrange-ProjetPOO/projet/ModelDefinition/Package1_0837.uml
https://github.com/Jiwan/Civilisation/raw/master/Guegant-Lagrange-Proje

https://github.com/JohanBeekers/Jue_De_Barricade/raw/master/Jeu_De_Barricade_Eindproject/DKD/ModelDefinition/Package5_1354.uml
https://github.com/JohanBeekers/Jue_De_Barricade/raw/master/Jeu_De_Barricade_Eindproject/DKD/ModelDefinition/Package6_1354.uml
https://github.com/JohanBeekers/Jue_De_Barricade/raw/master/Jeu_De_Barricade_Eindproject/DKD/ModelDefinition/Package7_1354.uml
https://github.com/JohanBeekers/Jue_De_Barricade/raw/master/Jeu_De_Barricade_Eindproject/DKD/ModelDefinition/Package8_1354.uml
https://github.com/JohanBeekers/Jue_De_Barricade/raw/master/Jeu_De_Barricade_Eindproject/DKD/ModelDefinition/Package9_1354.uml
https://github.com/JohanBeekers/Jue_De_Barricade/raw/master/Jeu_De_Barricade_Eindproject/DKD-abstract/ModelDefinition/DKD-abstract.uml
https://github.com/JohanBeekers/Jue_De_Barricade/raw/master/Jeu_De_Barricade_Eindproject/DKD-abstract/ModelDefinition/Package_1203.uml
https://github.com/JohanBeekers/Jue_De_Barricade/raw/master/Jeu_De_Barricade_Eindproject/DKD-ab

https://github.com/jornane/ttm4115-termassignment/raw/master/TTM4115MQTT2014.uml
https://github.com/jornane/ttm4115/raw/master/Dispatcher.uml
https://github.com/jornane/ttm4115/raw/master/FreeTaxiPool.uml
https://github.com/jornane/ttm4115/raw/master/Queue.uml
https://github.com/jornane/ttm4115/raw/master/RequestPool.uml
https://github.com/jornane/ttm4115/raw/master/ShortestRouteCalculator.uml
https://github.com/jornane/ttm4115/raw/master/Taxi.uml
https://github.com/jornane/ttm4115/raw/master/TaxiClient.uml
https://github.com/jornane/ttm4115/raw/master/TaxiDispatcher.uml
https://github.com/jornane/ttm4115/raw/master/TaxiFleet.uml
https://github.com/jornane/ttm4115/raw/master/TaxiSystem.uml
https://github.com/jornane/ttm4115/raw/master/UserClient.uml
https://github.com/josephbirkner/archive-util/raw/master/doku/XML%20Object%20Archive.uml
https://github.com/josericardo-ac/Titulacion/raw/master/Sistema%20Shajobe/Inventario%20in%20out/ModelDefinition/Inventario%20in%20out.uml
https://githu

https://github.com/jtrentes/test/raw/master/My-Project.uml
https://github.com/JTStark/RPG-Cyberpunk/raw/master/UML/Combate/model.uml
https://github.com/JTStark/RPG-Cyberpunk/raw/master/UML/DiagramaFinal/CombatComponent.uml
https://github.com/JTStark/RPG-Cyberpunk/raw/master/UML/DiagramaFinal/Componentes.uml
https://github.com/JTStark/RPG-Cyberpunk/raw/master/UML/DiagramaFinal/model.uml
https://github.com/JTStark/RPG-Cyberpunk/raw/master/UML/Eventos_Randomicos/RandomEvents.uml
https://github.com/JTStark/RPG-Cyberpunk/raw/master/UML/Inventario%20e%20Loja/InventarioeLoja.uml
https://github.com/JTStark/RPG-Cyberpunk/raw/master/UML/Mapa/MainMap.uml
https://github.com/JTStark/RPG-Cyberpunk/raw/master/UML/Menus/model.uml
https://github.com/juananruiz/cuestorus/raw/master/diagrama_clases.uml
https://github.com/juancadavid/papyrus/raw/master/doc/DevelopperDocuments/architecture/CommandStackFramework.uml
https://github.com/juancadavid/papyrus/raw/master/doc/DevelopperDocuments/architecture/papyr

https://github.com/juancadavid/papyrus/raw/master/extraplugins/qompass-designer/org.eclipse.papyrus.qompass.modellibs.core/models/wizardTemplates/QompassSimple.uml
https://github.com/juancadavid/papyrus/raw/master/extraplugins/qompass-designer/tracing/org.eclipse.papyrus.qompass.modellibs.tracing/models/examples/ClientServerTraceTest.uml
https://github.com/juancadavid/papyrus/raw/master/extraplugins/qompass-designer/tracing/org.eclipse.papyrus.qompass.modellibs.tracing/models/examples/ProducerConsumer.uml
https://github.com/juancadavid/papyrus/raw/master/extraplugins/qompass-designer/tracing/org.eclipse.papyrus.qompass.modellibs.tracing/models/examples/SampleOOTraceTest.uml
https://github.com/juancadavid/papyrus/raw/master/extraplugins/qompass-designer/tracing/org.eclipse.papyrus.qompass.modellibs.tracing/models/library/tracing.uml
https://github.com/juancadavid/papyrus/raw/master/extraplugins/req/org.eclipse.papyrus.req.reqif/doc/model/ReqifForDarwin.uml
https://github.com/juancadavid

https://github.com/shock1974/CutterAgentService/raw/master/CutterAgentService/design/HYCutterService.uml
https://github.com/shock1974/qt4_CutterAgent/raw/master/design/HYCutterService.uml
https://github.com/siddharthmodala/EAMS/raw/master/Documentation/Project%20Diagrams/eams.uml
https://github.com/StarUMLZone/StarUML/raw/master/src/staruml/deploy/Samples/StarUML%20Application%20Model.uml
https://github.com/StarUMLZone/StarUML/raw/master/src/staruml/deploy/Samples/UML%20Interchange%20Metamodel%20Abstract%20Syntax.uml
https://github.com/StevenTCramer/WhiteStarUml/raw/master/staruml/deploy/Samples/StarUML%20Application%20Model.uml
https://github.com/StevenTCramer/WhiteStarUml/raw/master/staruml/deploy/Samples/UML%20Interchange%20Metamodel%20Abstract%20Syntax.uml
https://github.com/YudingZhou/kitt/raw/master/doc/kitt.uml



In [15]:
with open('data/uml_extracted_metadata.json', 'w') as fp:
    json.dump(data_dict, fp)

In [42]:
data_dict

{'https://github.com/intel/umf/raw/master/docs/design/LLD.uml': {'classes': ['MetaFile',
   'MetaSource',
   'Property',
   'Set',
   'Item',
   'MetaFileImpl',
   'MetaSourceImpl',
   'PropertyImpl',
   'SetImpl',
   'ItemImpl',
   'ObjectFactory',
   'DataSourceFactory',
   'DataSourceXMP',
   'DataSource',
   'FrameRegion',
   'Time']},
 'https://github.com/1-aarsproeve/1-aarsproeve/raw/master/1aarsproeve/UML/ModelDefinition/Package_1059.uml': {'classes': ['HovedmenuView',
   'Beskeder',
   'VagtplanView',
   'Ansatte',
   'AnsatteView',
   'Stillinger',
   'Vagter',
   'Anmodninger',
   'Ugedage',
   'ViewContext',
   'TableContext']},
 'https://github.com/1-aarsproeve/1-aarsproeve/raw/master/1aarsproeve/UML/ModelDefinition/Package_1205.uml': {'classes': ['Hovedmenu',
   'Login',
   'OpretBruger',
   'OpretVagt',
   'Profil',
   'RedigerVagt',
   'SkrivBesked',
   'Vagtplan',
   'Splash',
   'Anmodninger']},
 'https://github.com/1-aarsproeve/1-aarsproeve/raw/master/1aarsproeve/UML/

# Language detection

Because the dataset contains attributes and classes in lots of languages, we want to identify all the English files that contain attributes and classes, and filter out all the rest. For this, we use FastText.

In [19]:
# Reload data_dict from file
with open('data/uml_extracted_metadata.json') as json_file:
    data_dict = json.load(json_file)

    # Make sure all programmic cases are turned into spaces like "normal" text for better language detection
    cleaned_data = {
        file: {
            key: list(map(lambda x: ' '.join(list(map(lambda data_str: re.sub(r'((?<=[a-z])[A-Z]|(?<!\A)[A-Z](?=[a-z]))', r' \1', data_str.replace('_', ' ').strip()).lower(), x.split(' ')))), value))
            for key, value in metadata.items()
        } 
        for file, metadata in data_dict.items()
    }

In [20]:
cleaned_data

{'https://github.com/intel/umf/raw/master/docs/design/LLD.uml': {'classes': ['meta file',
   'meta source',
   'property',
   'set',
   'item',
   'meta file impl',
   'meta source impl',
   'property impl',
   'set impl',
   'item impl',
   'object factory',
   'data source factory',
   'data source xmp',
   'data source',
   'frame region',
   'time']},
 'https://github.com/1-aarsproeve/1-aarsproeve/raw/master/1aarsproeve/UML/ModelDefinition/Package_1059.uml': {'classes': ['hovedmenu view',
   'beskeder',
   'vagtplan view',
   'ansatte',
   'ansatte view',
   'stillinger',
   'vagter',
   'anmodninger',
   'ugedage',
   'view context',
   'table context']},
 'https://github.com/1-aarsproeve/1-aarsproeve/raw/master/1aarsproeve/UML/ModelDefinition/Package_1205.uml': {'classes': ['hovedmenu',
   'login',
   'opret bruger',
   'opret vagt',
   'profil',
   'rediger vagt',
   'skriv besked',
   'vagtplan',
   'splash',
   'anmodninger']},
 'https://github.com/1-aarsproeve/1-aarsproeve/ra

In [3]:
PRETRAINED_MODEL_PATH = 'data/lid.176.bin'

model = fasttext.load_model(PRETRAINED_MODEL_PATH)



In [187]:
# Approximate the language from the extracted classes and attributes
def get_file_language(metadata):
    # Combine all classes and attributes into one array
    all_metadata = sum(metadata.values(), [])
    
    # Detect language for each word
    predictions = model.predict(all_metadata)
    
    # Specific approach for longer texts due to issue with habit of using English in programming
    def get_field_level_prediction(string):
        # Split into single words to detect on word level
        words = string.split(' ')
        
        # Get single-word predictions
        string_predictions = np.array(model.predict(words))
        
        # Return the appropriate language
        if len(string_predictions) == 1:
            return string_predictions[0][0][0]
        else:
            # Option 1: all identical language identification
            if len(np.unique(string_predictions[0])) == 1:
                return string_predictions[0][0][0]
            
            # Option 2: combination of an English programmatic word with another language, e.g. "hovedmenu view"
            elif len(string_predictions) == 2 and any('__label__en' in label for label in string_predictions):
                return [x for x in map(lambda x: x[0], string_predictions[0]) if x != '__label__en'][0]
            
            # Option 3: majority vote
            else:
                return Counter(map(lambda x: x[0], string_predictions[0])).most_common(1)[0][0]
        
    # Apply to all fields, identify language for all classes and attributes
    predictions = list(map(lambda x: get_field_level_prediction(x), all_metadata))

    # Get the most common language from all attributes
    return Counter(predictions).most_common(1)[0][0]

In [188]:
get_file_language(cleaned_data['https://github.com/intel/umf/raw/master/docs/design/LLD.uml'])

'__label__en'

In [189]:
get_file_language(cleaned_data['https://github.com/1-aarsproeve/1-aarsproeve/raw/master/1aarsproeve/UML/ModelDefinition/Package_1207.uml'])

'__label__da'

In [190]:
labeled_cleaned_data = {
    key: {**value, **{'lang': get_file_language(value)}}
    for key, value in cleaned_data.items()
}

In [192]:
labeled_cleaned_data

{'https://github.com/intel/umf/raw/master/docs/design/LLD.uml': {'classes': ['meta file',
   'meta source',
   'property',
   'set',
   'item',
   'meta file impl',
   'meta source impl',
   'property impl',
   'set impl',
   'item impl',
   'object factory',
   'data source factory',
   'data source xmp',
   'data source',
   'frame region',
   'time'],
  'lang': '__label__en'},
 'https://github.com/1-aarsproeve/1-aarsproeve/raw/master/1aarsproeve/UML/ModelDefinition/Package_1059.uml': {'classes': ['hovedmenu view',
   'beskeder',
   'vagtplan view',
   'ansatte',
   'ansatte view',
   'stillinger',
   'vagter',
   'anmodninger',
   'ugedage',
   'view context',
   'table context'],
  'lang': '__label__da'},
 'https://github.com/1-aarsproeve/1-aarsproeve/raw/master/1aarsproeve/UML/ModelDefinition/Package_1205.uml': {'classes': ['hovedmenu',
   'login',
   'opret bruger',
   'opret vagt',
   'profil',
   'rediger vagt',
   'skriv besked',
   'vagtplan',
   'splash',
   'anmodninger'],


In [193]:
with open('data/uml_extracted_metadata_annotated.json', 'w') as fp:
    json.dump(labeled_cleaned_data, fp)