# Progetto social
## Realizzazione di grafi riguardanti le pubblicazioni degli articoli del dipartimento scientifico
Ricerca tramite piattaforma Scopus di pubblicazioni di articoli scientifici dell'università di udine e analisi dei dati.
### Analisi del dataset
Caricamento dataset e valutazione attendibilità dei paramtri


In [25]:
# Lista delle librerie da controllare
import importlib
import subprocess
import sys

# Dizionario: nome_pip -> nome_import
required_packages = {
    "pandas": "pandas",
    "networkx": "networkx",
    "pyvis": "pyvis",
    "matplotlib": "matplotlib",
    "numpy": "numpy",
    "plotly": "plotly",
    "ipython": "IPython",
    "scipy": "scipy"
}

for pip_name, import_name in required_packages.items():
    try:
        importlib.import_module(import_name)
        print(f"{pip_name} già installato ✅")
    except ImportError:
        print(f"{pip_name} non trovato. Installazione in corso...")
        subprocess.check_call(
            [sys.executable, "-m", "pip", "install", pip_name]
        )
        print(f"{pip_name} installato correttamente ✅")

pandas già installato ✅
networkx già installato ✅
pyvis già installato ✅
matplotlib già installato ✅
numpy già installato ✅
plotly già installato ✅
ipython già installato ✅
scipy già installato ✅


In [26]:
import pandas as pd
import networkx as nx
from pyvis.network import Network
from collections import Counter
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
import numpy as np
import os
from IPython.display import display
import itertools
import plotly.graph_objects as go
import scipy

In [20]:
#caricamento dataset grezzo
df=pd.read_csv('dataset.csv')
df.head()

Unnamed: 0,Authors,Author full names,Author(s) ID,Title,Year,Source title,Volume,Issue,Art. No.,Page start,...,ISBN,CODEN,PubMed ID,Language of Original Document,Abbreviated Source Title,Document Type,Publication Stage,Open Access,Source,EID
0,"Lancia, G.; Dalpasso, M.","Lancia, Giuseppe G. (6701584197); Dalpasso, Ma...",6701584197; 6603897248,Speeding Up Floyd–Warshall’s Algorithm to Comp...,2025,Algorithms,18.0,9.0,560.0,,...,,,,English,Algorithms,Article,Final,All Open Access; Gold Open Access,Scopus,2-s2.0-105017372448
1,"Da Ros, F.; Di Gaspero, L.; Kletzander, L.; La...","Da Ros, Francesca (58134137500); Di Gaspero, L...",58134137500; 6505944235; 57194560339; 57188717...,Dynamic Temperature Control of Simulated Annea...,2025,,,,,184.0,...,9798400714658,,,English,GECCO - Proc. Genetic Evol. Comput. Conf.,Conference paper,Final,,Scopus,2-s2.0-105013077196
2,"Bacchetti, E.; de Nardin, A.; Giannarini, G.; ...","Bacchetti, Emiliano (59995162800); de Nardin, ...",59995162800; 57259162600; 55883317500; 2448058...,A Deep Learning Model Integrating Clinical and...,2025,Cancers,17.0,13.0,2257.0,,...,,,,English,Cancers,Article,Final,All Open Access; Gold Open Access; Green Accep...,Scopus,2-s2.0-105010643815
3,"Ozkilinc, O.; Soler, M.A.; Giannozzi, P.; Apar...","Ozkilinc, Ozge (58138235700); Soler, Miguel A....",58138235700; 8718855800; 7004488307; 572046647...,The Single-Parameter Bragg–Williams Model for ...,2025,International Journal of Molecular Sciences,26.0,3.0,997.0,,...,,,39940766.0,English,Int. J. Mol. Sci.,Article,Final,All Open Access; Gold Open Access; Green Accep...,Scopus,2-s2.0-85217742685
4,"Faletič, S.; Micheli, M.; Pospiec, G.","Faletič, Sergej (56595108900); Micheli, Marisa...",56595108900; 59664609000; 59665606800,Teaching and learning quantum entanglement: a ...,2025,Journal of Physics: Conference Series,2950.0,1.0,12025.0,,...,9788394593742; 9781628905861,,,English,J. Phys. Conf. Ser.,Conference paper,Final,All Open Access; Gold Open Access,Scopus,2-s2.0-85219573912


In [21]:
#Test e analisi per pulizia dataframe
num_articoli = df.shape[0] #numero aritcoli
null_counts = df.isnull().sum() #Conta valori nulli per colonna
zero_counts = (df == 0).sum() # Conta valori pari a 0 per colonna
print(f"Numero di articoli: {num_articoli}")
#Riepilogo dei valori mancanti
missing_summary = pd.DataFrame({
    "null_values": null_counts,
    "zero_values": zero_counts
})
print(missing_summary)

# Soglia: un terzo dei valori
limite = len(df)/3
cols_to_drop = []# Lista delle colonne da eliminare
for col in df.columns:
    num_null = df[col].isna().sum()
    num_zero = 0
    if pd.api.types.is_numeric_dtype(df[col]):
        num_zero = (df[col] == 0).sum()
    unique_vals = df[col].dropna().unique() #toglie le colonne con un solo valore ripetuto
    if len(unique_vals) <= 1:
        cols_to_drop.append(col)
    # Se la somma di nulli + zeri supera limite, togli la colonna
    if (num_null + num_zero) >= limite:
        cols_to_drop.append(col)
df_clean = df.drop(columns=cols_to_drop)
df_clean.to_csv("nuovo_dataset.csv", index=False)
cols_to_drop


Numero di articoli: 143
                               null_values  zero_values
Authors                                  0            0
Author full names                        0            0
Author(s) ID                             0            0
Title                                    0            0
Year                                     0            0
Source title                            20            0
Volume                                  23            0
Issue                                   77            0
Art. No.                                95            0
Page start                              62            0
Page end                                63            0
Page count                               0          143
Cited by                                 0           27
DOI                                     12            0
Link                                     0            0
Affiliations                             0            0
Authors with affiliation

['Issue',
 'Art. No.',
 'Page start',
 'Page end',
 'Page count',
 'Page count',
 'Molecular Sequence Numbers',
 'Molecular Sequence Numbers',
 'Chemicals/CAS',
 'Tradenames',
 'Manufacturers',
 'Funding Details',
 'Funding Texts',
 'Editors',
 'Sponsors',
 'Conference name',
 'Conference date',
 'Conference location',
 'Conference code',
 'ISBN',
 'CODEN',
 'PubMed ID',
 'Language of Original Document',
 'Open Access',
 'Source']

In [22]:
df=pd.read_csv("nuovo_dataset.csv")
num_colonne = df.shape[1]
print("numero di colonne: ", num_colonne)
df.head(46)

numero di colonne:  23


Unnamed: 0,Authors,Author full names,Author(s) ID,Title,Year,Source title,Volume,Cited by,DOI,Link,...,Author Keywords,Index Keywords,References,Correspondence Address,Publisher,ISSN,Abbreviated Source Title,Document Type,Publication Stage,EID
0,"Lancia, G.; Dalpasso, M.","Lancia, Giuseppe G. (6701584197); Dalpasso, Ma...",6701584197; 6603897248,Speeding Up Floyd–Warshall’s Algorithm to Comp...,2025,Algorithms,18,0,10.3390/a18090560,https://www.scopus.com/inward/record.uri?eid=2...,...,all-pairs shortest paths; FastSet data structu...,Computational complexity; Graph algorithms; Al...,"Floyd, Robert W., Algorithm 97: Shortest path,...","G. Lancia; Department of Mathematics, Computer...",Multidisciplinary Digital Publishing Institute...,19994893,Algorithms,Article,Final,2-s2.0-105017372448
1,"Da Ros, F.; Di Gaspero, L.; Kletzander, L.; La...","Da Ros, Francesca (58134137500); Di Gaspero, L...",58134137500; 6505944235; 57194560339; 57188717...,Dynamic Temperature Control of Simulated Annea...,2025,,,0,10.1145/3712256.3726390,https://www.scopus.com/inward/record.uri?eid=2...,...,combinatorial optimization; empirical analysis...,Application programs; Artificial intelligence;...,"Adriaensen, Steven, Fair-share ILS: A simple s...",,"Association for Computing Machinery, Inc",,GECCO - Proc. Genetic Evol. Comput. Conf.,Conference paper,Final,2-s2.0-105013077196
2,"Bacchetti, E.; de Nardin, A.; Giannarini, G.; ...","Bacchetti, Emiliano (59995162800); de Nardin, ...",59995162800; 57259162600; 55883317500; 2448058...,A Deep Learning Model Integrating Clinical and...,2025,Cancers,17,0,10.3390/cancers17132257,https://www.scopus.com/inward/record.uri?eid=2...,...,artificial intelligence; biopsy; magnetic reso...,alpha adrenergic receptor blocking agent; pros...,"Cornford, Philip A., EAU-EANM-ESTRO-ESUR-ISUP-...","G. Giannarini; Urology Unit, University Hospit...",Multidisciplinary Digital Publishing Institute...,20726694,Cancers,Article,Final,2-s2.0-105010643815
3,"Ozkilinc, O.; Soler, M.A.; Giannozzi, P.; Apar...","Ozkilinc, Ozge (58138235700); Soler, Miguel A....",58138235700; 8718855800; 7004488307; 572046647...,The Single-Parameter Bragg–Williams Model for ...,2025,International Journal of Molecular Sciences,26,0,10.3390/ijms26030997,https://www.scopus.com/inward/record.uri?eid=2...,...,binary mixtures; Bragg–Williams; eutectic solv...,deep eutectic solvent; solvent; Article; Bragg...,"Abbott, Andrew P., Deep Eutectic Solvents form...",F. Fogolari; Dipartimento di Scienze Matematic...,Multidisciplinary Digital Publishing Institute...,14220067; 16616596,Int. J. Mol. Sci.,Article,Final,2-s2.0-85217742685
4,"Faletič, S.; Micheli, M.; Pospiec, G.","Faletič, Sergej (56595108900); Micheli, Marisa...",56595108900; 59664609000; 59665606800,Teaching and learning quantum entanglement: a ...,2025,Journal of Physics: Conference Series,2950,0,10.1088/1742-6596/2950/1/012025,https://www.scopus.com/inward/record.uri?eid=2...,...,,Quantum optics; Empirical research; Fundamenta...,"J Phys Conf Ser, (2021); Qtedu Consortium 2021...","S. Faletič; University of Ljubljana, Faculty o...",Institute of Physics,17426588; 17426596,J. Phys. Conf. Ser.,Conference paper,Final,2-s2.0-85219573912
5,"Unal, A.; Michelini, M.; Santi, L.G.","Unal, Aycin (56938060100); Michelini, Marisa (...",56938060100; 23016123800; 35227961200,Teachers' Perspectives and Practices on Teachi...,2025,Journal of Physics: Conference Series,2950,0,10.1088/1742-6596/2950/1/012008,https://www.scopus.com/inward/record.uri?eid=2...,...,,Elementary and middle schools; Method and tech...,"Hadenfeldt, Jan Christoph, Framing students’ p...","A. Ünal; Research Unit in Physics Education, D...",Institute of Physics,17426588; 17426596,J. Phys. Conf. Ser.,Conference paper,Final,2-s2.0-85219548280
6,"Della Rossa, M.; Tanwani, A.","Della Rossa, Matteo (57207046261); Tanwani, An...",57207046261; 26422519500,Converse Lyapunov results for stability of swi...,2025,"ESAIM - Control, Optimisation and Calculus of ...",31,0,10.1051/cocv/2025006,https://www.scopus.com/inward/record.uri?eid=2...,...,Average dwell-time; Converse results; Multiple...,Lyapunov functions; Lyapunov methods; Nonlinea...,"Morse, A. Stephen, Supervisory control of fami...",M. Della Rossa; Dipartimento di Scienze Matema...,EDP Sciences,12928119; 12623377,Control Optimisation Calc. Var.,Article,Final,2-s2.0-85218944729
7,"Soler, M.A.; Yakout, R.B.A.; Ozkilinc, O.; Esp...","Soler, Miguel A. (8718855800); Yakout, Rayyan ...",8718855800; 59506327000; 58138235700; 72023173...,Bluues_cplx: Electrostatics at Protein–Protein...,2025,Molecules,30,1,10.3390/molecules30010159,https://www.scopus.com/inward/record.uri?eid=2...,...,bluues; complex; electrostatics; free energy; ...,ligand; protein; protein binding; chemical phe...,"Nooren, Irene M.A., Diversity of protein-prote...",F. Fogolari; Dipartimento di Scienze Matematic...,Multidisciplinary Digital Publishing Institute...,14203049,Molecules,Article,Final,2-s2.0-85214473493
8,"Bernardini, A.; Giampiccolo, R.; Bozzo, E.; Fo...","Bernardini, Alberto (57078184800); Giampiccolo...",57078184800; 57224829981; 8964011500; 55970349500,Wave Digital Extended Fixed-Point Solvers for ...,2025,IEEE Transactions on Circuits and Systems,72,2,10.1109/TCSI.2024.3488508,https://www.scopus.com/inward/record.uri?eid=2...,...,circuit simulation; extended fixed-point solve...,Bandpass filters; Digital filters; Inverse pro...,"Z̈olzer, Udo, DAFX: Digital Audio Effects: Sec...","A. Bernardini; Politecnico di Milano, Dipartim...",Institute of Electrical and Electronics Engine...,15498328,IEEE Trans. Circuits Syst. Regul. Pap.,Article,Final,2-s2.0-85208596332
9,"Kottavalasa, Y.N.; Battaglia, A.; Bevilacqua, ...","Kottavalasa, Yellam Naidu (60098093600); Batta...",60098093600; 60097745700; 59798921000; 2348601...,Fusion-Based LSTM-GRU-Attention Model for Time...,2025,,,1,10.23919/FUSION65864.2025.11123885,https://www.scopus.com/inward/record.uri?eid=2...,...,Fouling Factor; Fusion Model; Neural Networks;...,Data fusion; Forecasting; Fouling; Industrial ...,"Inverse Heat Conduction and Heat Exchangers, (...",,Institute of Electrical and Electronics Engine...,,"Proc. Int. Conf. Inf. Fusion, FUSION",Conference paper,Final,2-s2.0-105015864943


## Creazione del primo grafo
Creazione del grafo per anno che mostra la relazione delle collaborazioni tra autori delle varie pubblicazioni.

In [23]:
df["authors_list"] = df["Authors"].str.split(",")
df["authors_list"] = df["authors_list"].apply(
    lambda x: [a.strip() for a in x] if isinstance(x, list) else x
)

# assicura che Year sia numerico
df["Year"] = pd.to_numeric(df["Year"], errors="coerce")
grafi_per_anno = {}
for anno, df_anno in df.groupby("Year"):
    G = nx.Graph()
    for authors in df_anno["authors_list"].dropna():
        for a1, a2 in itertools.combinations(authors, 2):
            if G.has_edge(a1, a2):
                G[a1][a2]["weight"] += 1
            else:
                G.add_edge(a1, a2, weight=1)
    grafi_per_anno[anno] = G


In [24]:
## sistemare nomi pallini e archi
pos = nx.circular_layout(
    nx.compose_all(grafi_per_anno.values())
)
def plot_graph_plotly(G):
    edge_x, edge_y = [], []

    for u, v in G.edges():
        x0, y0 = pos[u]
        x1, y1 = pos[v]
        edge_x += [x0, x1, None]
        edge_y += [y0, y1, None]

    edge_trace = go.Scatter(
        x=edge_x,
        y=edge_y,
        mode="lines",
        line=dict(width=1, color="gray"),
        hoverinfo="none"
    )

    node_x, node_y, hover_text = [], [], []

    for node in G.nodes():
        x, y = pos[node]
        node_x.append(x)
        node_y.append(y)
        hover_text.append(
            f"{node}<br>Collaborazioni: {G.degree(node)}"
        )

    node_trace = go.Scatter(
        x=node_x,
        y=node_y,
        mode="markers",
        hoverinfo="text",
        text=hover_text,
        marker=dict(
            size=8,
            color="royalblue",
            line=dict(width=0.5, color="black")
        )
    )

    return edge_trace, node_trace
frames = []

for anno in sorted(grafi_per_anno.keys()):
    edge, node = plot_graph_plotly(grafi_per_anno[anno])
    frames.append(
        go.Frame(
            data=[edge, node],
            name=str(anno)
        )
    )

fig = go.Figure(
    data=frames[0].data,
    frames=frames,
    layout=go.Layout(
        title="Collaborazioni tra autori nel tempo",
        paper_bgcolor="white",
        plot_bgcolor="white",

        xaxis=dict(
            showgrid=False,
            zeroline=False,
            visible=False
        ),
        yaxis=dict(
            showgrid=False,
            zeroline=False,
            visible=False
        ),

        updatemenus=[{
            "type": "buttons",
            "buttons": [{
                "label": "Play",
                "method": "animate",
                "args": [
                    None,
                    {
                        "frame": {"duration": 2000, "redraw": True}, 
                        "transition": {"duration": 500},
                        "fromcurrent": True
                    }
                ]
            }]
        }],

        sliders=[{
            "steps": [
                {
                    "method": "animate",
                    "args": [[f.name]],
                    "label": f.name
                }
                for f in frames
            ]
        }]
    )
)

fig.show()



In [28]:

#Calcolo del layout su TUTTI i nodi (coerenza tra anni)
G_totale = nx.compose_all(grafi_per_anno.values())
pos = nx.spring_layout(G_totale, seed=42)  # layout “a molla”, riproducibile
def plot_graph_plotly(G, pos):
    edge_x, edge_y = [], []
    for u, v in G.edges():
        x0, y0 = pos[u]
        x1, y1 = pos[v]
        edge_x += [x0, x1, None]
        edge_y += [y0, y1, None]
    edge_trace = go.Scatter(
        x=edge_x, y=edge_y,
        mode="lines",
        line=dict(width=1, color="gray"),
        hoverinfo="none"
    )
    node_x, node_y, hover_text = [], [], []
    sizes = []
    for node in G.nodes():
        x, y = pos[node]
        node_x.append(x)
        node_y.append(y)
        hover_text.append(f"{node}<br>Collaborazioni: {G.degree(node)}")
        sizes.append(5 + G.degree(node) * 2)  # dimensione proporzionale al grado
    node_trace = go.Scatter(
        x=node_x, y=node_y,
        mode="markers",
        hoverinfo="text",
        text=hover_text,
        marker=dict(
            size=sizes,
            color="royalblue",
            line=dict(width=0.5, color="black")
        )
    )
    return edge_trace, node_trace
frames = []
for anno in sorted(grafi_per_anno.keys()):
    edge, node = plot_graph_plotly(grafi_per_anno[anno], pos)
    frames.append(go.Frame(
        data=[edge, node],
        name=str(anno),
        layout=go.Layout(title_text=f"Collaborazioni tra autori – {anno}")
    ))
fig = go.Figure(
    data=frames[0].data,
    frames=frames,
    layout=go.Layout(
        title="Collaborazioni tra autori nel tempo",
        paper_bgcolor="white",
        plot_bgcolor="white",
        xaxis=dict(range=[-1.1, 1.1], showgrid=False, zeroline=False, visible=False),
        yaxis=dict(range=[-1.1, 1.1], showgrid=False, zeroline=False, visible=False),
        updatemenus=[{
            "type": "buttons",
            "buttons": [{
                "label": "Play",
                "method": "animate",
                "args": [
                    None,
                    {
                        "frame": {"duration": 2000, "redraw": True},
                        "transition": {"duration": 500},
                        "fromcurrent": True
                    }
                ]
            }]
        }],
        sliders=[{
            "steps": [
                {"method": "animate", "args": [[f.name]], "label": f.name}
                for f in frames
            ]
        }]
    )
)
fig.show()

## Secondo grafo
Creazione del grafo per centralità di ogni anno che mostra la relazione delle collaborazioni tra autori delle varie pubblicazioni con il nodo centrale