In [14]:
%use dataframe

## Обращаемся к VK API

### Подключаем необходимые библиотеки и делаем импорты

In [1]:
@file:DependsOn("io.ktor:ktor-client-core-jvm:2.0.0-beta-1")
@file:DependsOn("io.ktor:ktor-client-cio-jvm:2.0.0-beta-1")

// Понадобится, если захотим использовать Bearer
@file:DependsOn("io.ktor:ktor-client-auth-jvm:2.0.0-beta-1")

// Десериализация в объект класса
@file:DependsOn("io.ktor:ktor-client-content-negotiation-jvm:2.0.0-beta-1")
@file:DependsOn("io.ktor:ktor-serialization-kotlinx-json-jvm:2.0.0-beta-1")
@file:DependsOn("io.ktor:ktor-client-content-negotiation:2.3.7")
@file:DependsOn("io.ktor:ktor-client-content-negotiation-jvm:2.3.7")
@file:DependsOn("io.ktor:ktor-client-okhttp:2.0.0-beta-1")
@file:DependsOn("io.ktor:ktor-client-gson:2.0.0-beta-1")

import io.ktor.client.HttpClient
import io.ktor.client.engine.cio.CIO
import io.ktor.client.plugins.auth.Auth
import io.ktor.client.plugins.auth.providers.BasicAuthCredentials
import io.ktor.client.plugins.auth.providers.basic
import io.ktor.client.request.get
import kotlinx.coroutines.launch
import kotlinx.coroutines.GlobalScope
import kotlinx.serialization.Serializable
import kotlinx.serialization.encodeToString
import kotlinx.serialization.json.Json
import io.ktor.client.plugins.contentnegotiation.*

In [2]:
@Serializable
data class User(
    val id: Int,
    val first_name: String?,
    val last_name: String?,
    val nickname: String?,
    val city: CityDto?,
    val country: CityDto?,
    val home_town: String?,
    val sex: Int?,
    val bdate: String?,
    val deactivated: String?
)

@Serializable
data class ResponseDTO (
    val response: List<User>
)

@Serializable
data class CityDto (
    val id: Int,
    val title: String?
)

In [8]:
val accessToken = ""
val cityId = 73 // Krasnoyarsk

In [10]:
// Проверка корректности строки с годом рождения
fun checkYear(bdate: String): Boolean {
    return bdate.matches(""".*\d{4}""".toRegex())
}

In [5]:
import java.util.Random

// Опишем suspend функцию для отправки сетевого запроса
suspend fun makeRequest() {
    val client = HttpClient(CIO) {
        install(ContentNegotiation) {
            gson()
        }
    }

    val file = File("Documents/temp.csv")
    val writer = file.bufferedWriter()
    println(file.absolutePath)

    val oneStep = 300
    try {
        val lowerBound = Random().nextInt(799999629)
        for (id in lowerBound..999959629 step oneStep) {
            val resp: ResponseDTO = client.get(
                "https://api.vk.com/method/users.get?user_ids=${
                    (id..id + (oneStep - 1)).toList().joinToString(separator = ",")
                }&access_token=" +
                        "$accessToken&v=5.131&" +
                        "fields=city,country,name_case,home_town,nickname,sex,bdate"
            )
                .body()

            resp.response.forEach { user ->
                // ("user_id,first_name,last_name,nickname,city_id,city,country_id,country,home_town,year,sex,deactivated\n")
                if (user.city?.id == cityId) {
                    val year = if (user.bdate?.let { checkYear(it) } == true) {
                        with(user.bdate) {
                            this?.substring(this.length - 4, this.length)
                        }
                    } else {
                        null
                    }
                    writer.write(
                        "${user.id},${user.first_name},${user.last_name},${user.nickname},${user.city?.id},${user.city?.title}," +
                                "${user.country?.id},${user.country?.title},${user.home_town},${year},${user.sex},${user.deactivated}\n"
                    )
                }
            }
            writer.flush()
        }
    } finally {
        writer.close()
    }
    println("Finished!")
}

In [6]:
// Запустим корутину
GlobalScope.launch {
      makeRequest()

}

StandaloneCoroutine{Completed}@332f25c8

## Данные по Красноярску

In [12]:
val filePath = "/Users/dmitrijegorow/Downloads/VK_API/vk_users_krsk.csv"

In [15]:
val df = DataFrame.readCSV(filePath)
df

In [16]:
df.first_name

In [17]:
df.groupBy { first_name }

In [18]:
val topNames = df.groupBy { first_name }.aggregate {
    count { first_name != "DELETED" } into "total"
    median { sex } into "sex"
}.sortByDesc("total")

topNames

In [19]:
val topK = 10
val topNamesAll = listOf(
    topNames.filter { sex == 1 }.take(topK),
    topNames.filter { sex == 2 }.take(topK)
).concat()

topNames

In [None]:
// Сохраняем dataframe в файл
val topNamesFilePath = ""
topNamesAll.writeCSV(File(topNamesFilePath))

### Исследуем половозрастную структуру

In [20]:
val minYear = 1960
val maxYear = 2007

In [21]:
fun ageStructure(sexId: Int) = df.filter { sex == sexId }.drop {year == null}.groupBy { year }.aggregate {
    count { first_name != "DELETED" } into "total"
    median { sex } into "sex"
}.sortByDesc("year").filter { year!! >= minYear && year!! <= maxYear }

In [22]:
val womenStruct = ageStructure(1).filter { year!! > minYear  }

womenStruct

In [23]:
fun mergeWithFullYears(someStruct: DataFrame<Any>, sexId: Int): DataFrame<Any> {
    val year by (minYear..maxYear).toList().toColumn("year")
    val total by List(maxYear - minYear + 1) { 0 }.toColumn("total1")

    val tempDf = listOf(year, total).toDataFrame()
    val mergedDf = someStruct.join(tempDf, JoinType.Right) { year }
    val mergedDf1 = mergedDf.merge { "total" and "total1" }
        .by { (if (it[0] != null) it[0] else 0) }
        .into("total")
        .fillNulls { colsOf<Int?>() }.with { sexId } // после right-join нужно заполнить sex
    return mergedDf1.sortByDesc { year }
}

In [24]:
mergeWithFullYears(womenStruct, 1)

In [25]:
val menStruct = ageStructure(2)

menStruct

In [26]:
// Сделаем границы для мужчин и женщин одинаковыми
mergeWithFullYears(menStruct, 2)

In [27]:
val ageStruct = listOf(womenStruct, menStruct).concat()

ageStruct

In [28]:
val ageStruct = listOf(mergeWithFullYears(womenStruct, 1), mergeWithFullYears(menStruct, 2)).concat()

ageStruct

In [None]:
val ageStructFilePath = ""
ageStruct.writeCSV(File(ageStructFilePath))